package b0

  1. Overview
  2. Docs
Software construction and deployment kit

Install

dune-project
 Dependency

Authors

Maintainers

Sources

b0-0.0.6.tbz
sha512=e9aa779e66c08fc763019f16d4706f465d16c05d6400b58fbd0313317ef33ddea51952e2b058db28e65f7ddb7012f328c8bf02d8f1da17bb543348541a2587f0

doc/src/b0.std/b0_text.ml.html

Source file b0_text.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
(*---------------------------------------------------------------------------
   Copyright (c) 2019 The b0 programmers. All rights reserved.
   SPDX-License-Identifier: ISC
  ---------------------------------------------------------------------------*)

(* Error message helpers. *)

module Err_msg = struct
  let pf = Format.fprintf
  let pp_sp = Format.pp_print_space
  let pp_nop _ () = ()
  let pp_any fmt ppf _ = pf ppf fmt

  let pp_op_enum op ?(empty = pp_nop) pp_v ppf = function
  | [] -> empty ppf ()
  | [v] -> pp_v ppf v
  | _ as vs ->
      let rec loop ppf = function
      | [v0; v1] -> pf ppf "%a@ %s@ %a" pp_v v0 op pp_v v1
      | v :: vs -> pf ppf "%a,@ " pp_v v; loop ppf vs
      | [] -> assert false
      in
      loop ppf vs

  let pp_and_enum ?empty pp_v ppf vs = pp_op_enum "and" ?empty pp_v ppf vs
  let pp_or_enum ?empty pp_v ppf vs = pp_op_enum "or" ?empty pp_v ppf vs
  let pp_did_you_mean pp_v ppf = function
  | [] -> () | vs -> pf ppf "Did@ you@ mean %a ?" (pp_or_enum pp_v) vs

  let pp_must_be pp_v ppf = function
  | [] -> () | vs -> pf ppf "Must be %a." (pp_or_enum pp_v) vs

  let pp_unknown ~kind pp_v ppf v = pf ppf "Unknown %a %a." kind () pp_v v
  let pp_unknown' ~kind pp_v ~hint ppf (v, hints) = match hints with
  | [] -> pp_unknown ~kind pp_v ppf v
  | hints -> pp_unknown ~kind pp_v ppf v; pp_sp ppf (); (hint pp_v) ppf hints

  let min_by f a b = if f a <= f b then a else b
  let max_by f a b = if f a <= f b then b else a

  let edit_distance s0 s1 =
    let minimum a b c = min a (min b c) in
    let s0 = min_by String.length s0 s1     (* row *)
    and s1 = max_by String.length s0 s1 in  (* column *)
    let m = String.length s0 and n = String.length s1 in
    let rec rows row0 row i =
      if i > n then row0.(m) else begin
        row.(0) <- i;
        for j = 1 to m do
          if s0.[j - 1] = s1.[i - 1] then row.(j) <- row0.(j - 1) else
          row.(j) <-minimum (row0.(j - 1) + 1) (row0.(j) + 1) (row.(j - 1) + 1)
        done;
        rows row row0 (i + 1)
      end in
    rows (Array.init (m + 1) (fun x -> x)) (Array.make (m + 1) 0) 1

  let suggest ?(dist = 2) candidates s =
    let add (min, acc) name =
      let d = edit_distance s name in
      if d = min then min, (name :: acc) else
      if d < min then d, [name] else
      min, acc
    in
    let d, suggs = List.fold_left add (max_int, []) candidates in
    if d <= dist (* suggest only if not too far *) then List.rev suggs else []
end

(* Text locations *)

module Tloc = struct
  type fpath = string
  let pp_path = Format.pp_print_string

  type pos = int
  type line = int
  type line_pos = line * pos
  (* For lines we keep the byte position just after the newlinexs. It
     editors are still expecting tools to compute visual columns which
     is stupid.  By keeping these byte positions we can approximate
     columns by subtracting the line byte position from the byte
     location. This will only be correct on US-ASCII data though. Best
     would be to be able to give them [sbyte] and [ebyte]. *)

  let l v = v
  type t =
    { file : fpath;
      sbyte : pos; ebyte : pos;
      sline : pos * line; eline : pos * line }

  let no_file = "-"
  let v ~file ~sbyte ~ebyte ~sline ~eline = { file; sbyte; ebyte; sline; eline }
  let file l = l.file
  let sbyte l = l.sbyte
  let ebyte l = l.ebyte
  let sline l = l.sline
  let eline l = l.eline
  let nil =
    let pnil = -1 in
    let lnil = (-1, pnil) in
    v ~file:no_file ~sbyte:pnil ~ebyte:pnil ~sline:lnil ~eline:lnil

  let merge l0 l1 =
    let sbyte, sline =
      if l0.sbyte < l1.sbyte then l0.sbyte, l0.sline else l1.sbyte, l1.sline
    in
    let ebyte, eline =
      if l0.ebyte < l1.ebyte then l1.ebyte, l1.eline else l0.ebyte, l0.eline
    in
    v ~file:l0.file ~sbyte ~ebyte ~sline ~eline

  let to_start l =
    v ~file:l.file ~sbyte:l.sbyte ~ebyte:l.sbyte ~sline:l.sline ~eline:l.sline

  let to_end l =
    v ~file:l.file ~sbyte:l.ebyte ~ebyte:l.ebyte ~sline:l.eline ~eline:l.eline

  let restart ~at:s e =
    v ~file:e.file ~sbyte:s.sbyte ~ebyte:e.ebyte ~sline:s.sline ~eline:e.eline

  let pf = Format.fprintf
  let pp_ocaml ppf l = match l.ebyte < 0 with
  | true -> pf ppf "File \"%a\", line n/a, characters n/a" pp_path l.file
  | false ->
      let pp_lines ppf l = match fst l.sline = fst l.eline with
      | true -> pf ppf "line %d" (fst l.sline)
      | false -> pf ppf "lines %d-%d" (fst l.sline) (fst l.eline)
      in
      (* "characters" represent positions (insertion points) not columns *)
      let pos_s = l.sbyte - snd l.sline in
      let pos_e = l.ebyte - snd l.eline + 1 in
      pf ppf "File \"%a\", %a, characters %d-%d"
        pp_path l.file pp_lines l pos_s pos_e

  let pp_gnu ppf l = match l.ebyte < 0 with
  | true -> pf ppf "%a:" pp_path l.file
  | false ->
      let pp_lines ppf l =
        let col_s = l.sbyte - snd l.sline + 1 in
        let col_e = l.ebyte - snd l.eline + 1 in
        match fst l.sline = fst l.eline with
        | true ->  pf ppf "%d.%d-%d" (fst l.sline) col_s col_e
        | false ->
            pf ppf "%d.%d-%d.%d" (fst l.sline) col_s (fst l.eline) col_e
      in
      pf ppf "%a:%a" pp_path l.file pp_lines l

  let pp_dump ppf l =
    pf ppf "[bytes %d;%d][lines %d;%d][lbytes %d;%d]"
      l.sbyte l.ebyte (fst l.sline) (fst l.eline) (snd l.sline) (snd l.eline)

  let pp = pp_gnu

  (* Insertions and substitutions *)

  let string_subrange ?(first = 0) ?last s =
    let max = String.length s - 1 in
    let last = match last with
    | None -> max
    | Some l when l > max -> max
    | Some l -> l
    in
    let first = if first < 0 then 0 else first in
    if first > last then "" else
    String.sub s first (last - first + 1)

  let string_replace ~start ~stop ~rep s =
    let len = String.length s in
    if stop < start || start < 0 || start > len || stop < 0 || stop > len
    then invalid_arg (Printf.sprintf "invalid start:%d stop:%d" start stop) else
    let b = String.sub s 0 start in
    let a = String.sub s stop (len - stop) in
    String.concat "" [b; rep; a]
end

(* UTF-8 decoding table. *)

module Utf_8 = struct
  type case =
  | L1 | L2 | L3_E0 | L3_E1_EC_or_EE_EF | L3_ED | L4_F0 | L4_F1_F3 | L4_F4 | E

  let case =
(*
  (* See https://tools.ietf.org/html/rfc3629#section-4 *)
  Printf.printf "[|";
  for i = 0 to 255 do
    if i mod 16 = 0 then Printf.printf "\n";
    if 0x00 <= i && i <= 0x7F then Printf.printf "L1; " else
    if 0xC2 <= i && i <= 0xDF then Printf.printf "L2; " else
    if 0xE0 = i then Printf.printf "L3_E0; " else
    if 0xE1 <= i && i <= 0xEC || 0xEE <= i && i <= 0xEF
    then Printf.printf "L3_E1_EC_or_EE_EF; " else
    if 0xED = i then Printf.printf "L3_ED;" else
    if 0xF0 = i then Printf.printf "L4_F0; " else
    if 0xF1 <= i && i <= 0xF3 then Printf.printf "L4_F1_F3; " else
    if 0xF4 = i then Printf.printf "L4_F4; " else
    Printf.printf "E; "
  done;
  Printf.printf "\n|]"
*)
  [|
    L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
    L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
    L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
    L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
    L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
    L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
    L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
    L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
    E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
    E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
    E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
    E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
    E; E; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
    L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
    L3_E0; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
    L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
    L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
    L3_E1_EC_or_EE_EF; L3_ED;L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
    L4_F0; L4_F1_F3; L4_F1_F3; L4_F1_F3; L4_F4; E; E; E; E; E; E; E; E; E; E; E;
  |]
end

(* UTF-8 text decoder *)

module Tdec = struct
  type 'a fmt = Format.formatter -> 'a -> unit
  let pp_did_you_mean = Err_msg.pp_did_you_mean
  let pp_and_enum = Err_msg.pp_and_enum
  let pp_or_enum = Err_msg.pp_or_enum
  let pp_did_you_mean = Err_msg.pp_did_you_mean
  let pp_must_be = Err_msg.pp_must_be
  let pp_unknown = Err_msg.pp_unknown
  let pp_unknown' = Err_msg.pp_unknown'

  (* Decoders *)

  type t =
    { file : Tloc.fpath; i : string; tok : Buffer.t;
      mutable pos : int; mutable line : int; mutable line_pos : int; }

  let create ?(file = Tloc.no_file) i =
    { file; i; tok = Buffer.create 255; pos = 0; line = 1; line_pos = 0 }

  (* Location *)

  let file d = d.file
  let pos d = d.pos
  let line d = d.line, d.line_pos

  let loc d ~sbyte ~ebyte ~sline ~eline =
    Tloc.v ~file:d.file ~sbyte ~ebyte ~sline ~eline

  let loc_to_here d ~sbyte ~sline =
    loc d ~sbyte ~ebyte:d.pos ~sline ~eline:(d.line, d.line_pos)

  let loc_here d = loc_to_here d ~sbyte:d.pos ~sline:(d.line, d.line_pos)

  (* Errors *)

  exception Err of Tloc.t * string

  let err loc msg = raise_notrace (Err (loc, msg))
  let err_to_here d ~sbyte ~sline fmt =
    Format.kasprintf (err (loc_to_here d ~sbyte ~sline)) fmt

  let err_here d fmt = Format.kasprintf (err (loc_here d)) fmt
  let err_suggest = Err_msg.suggest

  (* Lexing *)

  let incr_line d = match d.i.[d.pos] with (* assert (not (eoi d)) *)
  | '\r' -> d.line <- d.line + 1; d.line_pos <- d.pos + 1
  | '\n' ->
      (if d.pos = 0 || d.i.[d.pos - 1] <> '\r' then d.line <- d.line + 1);
      d.line_pos <- d.pos + 1;
  | _ -> ()
  [@@ ocaml.inline]

  let eoi d = d.pos >= String.length d.i [@@ ocaml.inline]
  let byte d = if eoi d then 0xFFFF else Char.code d.i.[d.pos] [@@ ocaml.inline]
  let accept_byte d = incr_line d; d.pos <- d.pos + 1
  [@@ ocaml.inline]

  let accept_utf_8 accept d =
    let err d = match byte d with
    | 0xFFFF -> err_here d "UTF-8 decoding error: unexpected end of input"
    | b -> err_here d "UTF-8 decoding error: byte %02x illegal here" b
    in
    let accept_tail d = if (byte d lsr 6 = 0b10) then accept d else err d in
    match byte d with
    | 0xFFFF -> err d
    | b ->
        (* If a subsequent [byte d] invocation is 0xFFFF we get to [err]. *)
        match Utf_8.case.(b) with
        | L1 -> accept d
        | L2 -> accept d; accept_tail d
        | L3_E0 ->
            accept d;
            if (byte d - 0xA0 < 0xBF - 0xA0) then accept d else err d;
            accept_tail d
        | L3_E1_EC_or_EE_EF -> accept d; accept_tail d; accept_tail d
        | L3_ED ->
            accept d;
            if (byte d - 0x80 < 0x9F - 0x80) then accept d else err d;
            accept_tail d
        | L4_F0 ->
            accept d;
            if (byte d - 0x90 < 0xBF - 0x90) then accept d else err d;
            accept_tail d; accept_tail d
        | L4_F1_F3 ->
            accept d;
            accept_tail d; accept_tail d; accept_tail d;
        | L4_F4 ->
            accept d;
            if (byte d - 0x80 < 0x8F - 0x80) then accept d else err d;
        | E -> err d

  let accept_uchar d = accept_utf_8 accept_byte d

  (* Tokenizer *)

  let tok_reset d = Buffer.reset d.tok [@@ ocaml.inline]
  let tok_pop d = let t = Buffer.contents d.tok in tok_reset d; t
  [@@ ocaml.inline]

  let tok_accept_byte d =
    Buffer.add_char d.tok d.i.[d.pos]; accept_byte d; [@@ ocaml.inline]

  let tok_accept_uchar d = accept_utf_8 tok_accept_byte d [@@ ocaml.inline]
  let tok_add_byte d b = Buffer.add_char d.tok (Char.chr b) [@@ ocaml.inline]
  let tok_add_bytes d s = Buffer.add_string d.tok s [@@ ocaml.inline]
  let tok_add_char d c = Buffer.add_char d.tok c [@@ ocaml.inline]

  let buffer_add_uchar b u = match Uchar.to_int u with
  (* XXX From 4.06 use Buffer.add_utf_8_uchar *)
  | u when u < 0 -> assert false
  | u when u <= 0x007F ->
      Buffer.add_char b (Char.unsafe_chr u)
  | u when u <= 0x07FF ->
      Buffer.add_char b (Char.unsafe_chr (0xC0 lor (u lsr 6)));
      Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));
  | u when u <= 0xFFFF ->
      Buffer.add_char b (Char.unsafe_chr (0xE0 lor (u lsr 12)));
      Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));
      Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));
  | u when u <= 0x10FFFF ->
      Buffer.add_char b (Char.unsafe_chr (0xF0 lor (u lsr 18)));
      Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 12) land 0x3F)));
      Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));
      Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)))
  | _ -> assert false

  let tok_add_uchar d u = buffer_add_uchar d.tok u
end