Source file unstrctrd.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
type elt =
[ `Uchar of Uchar.t
| `WSP of wsp
| `LF
| `CR
| `FWS of wsp
| `d0
| `OBS_NO_WS_CTL of obs
| `Invalid_char of invalid_char ]
and wsp = string
and obs = char
and invalid_char = char
type t = elt list
type error = [ `Msg of string ]
let invalid_arg fmt = Format.kasprintf invalid_arg fmt
let error_msgf fmt = Format.kasprintf (fun err -> Error (`Msg err)) fmt
let empty = []
let length = List.length
let of_string str =
let module B = struct
type t = bytes
let blit_to_bytes = Bytes.blit
let buf = Bytes.create 4096
end in
let module M = struct
type 'a t =
| Read of { buffer : Bytes.t; continue : int -> 'a t }
| Fail of string
| Done of 'a
type buffer = bytes
let return x = Done x
let rec bind : 'a t -> ('a -> 'b t) -> 'b t = fun x f -> match x with
| Read { buffer; continue; } ->
let continue len = bind (continue len) f in
Read { buffer; continue; }
| Fail err -> Fail err
| Done x -> f x
let fail err = Fail err
let read k buf = Read { buffer= buf; continue= k; }
end in
let lexbuf = Lexer.make () in
let module Lexer = Lexer.Make(B)(M) in
let pos = ref 0 in
let rec go = function
| M.Done lst ->
let k res = Ok (lexbuf.Lexing.lex_abs_pos + lexbuf.Lexing.lex_curr_pos, res) in
Pp.pp k lst
| M.Fail err -> Error (`Msg err)
| M.Read { buffer; continue; } ->
let len = min (String.length str - !pos) (Bytes.length buffer) in
Bytes.blit_string str !pos buffer 0 len ; pos := !pos + len ;
go (continue len) in
try go (Lexer.unstructured [] lexbuf)
with _exn -> Error (`Msg "Unterminated input")
let safely_decode str = match of_string (str ^ "\r\n") with
| Ok v -> v
| Error (`Msg err) -> invalid_arg "%s" err
let to_utf_8_string ?(rep= Uutf.u_rep) lst =
let buf = Buffer.create (List.length lst) in
let iter = function
| `Invalid_char _chr -> Uutf.Buffer.add_utf_8 buf rep
| `d0 -> Buffer.add_char buf '\000'
| `WSP wsp -> Buffer.add_string buf wsp
| `OBS_NO_WS_CTL chr -> Buffer.add_char buf chr
| `Uchar uchar -> Uutf.Buffer.add_utf_8 buf uchar
| `CRLF -> Buffer.add_string buf "\r\n"
| `FWS wsp -> Buffer.add_string buf "\r\n" ; Buffer.add_string buf wsp
| `LF -> Buffer.add_char buf '\n'
| `CR -> Buffer.add_char buf '\r' in
List.iter iter lst ; Buffer.contents buf
let escape_uchar = Uchar.of_char '\\'
let lst =
let rec go stack ~escaped ~quoted_string acc = function
| [] -> if stack = 0 then Ok (List.rev acc) else error_msgf "Non-terminating comment"
| `Uchar uchar as value :: r ->
( match Uchar.to_int uchar with
| 0x22 ->
( match escaped, quoted_string, stack with
| true, _, 0 -> go stack ~escaped:false ~quoted_string (value :: acc) r
| true, _, _ -> go stack ~escaped:false ~quoted_string acc r
| false, _, 0 -> go stack ~escaped ~quoted_string:(not quoted_string) (value :: acc) r
| false, false, _ -> go stack ~escaped ~quoted_string acc r
| false, true, _ -> assert false )
| 0x28 ->
( match escaped, quoted_string, stack with
| true, _, 0 -> go stack ~escaped:false ~quoted_string (value :: acc) r
| true, _, _ -> go stack ~escaped:false ~quoted_string acc r
| false, true , 0 -> go 0 ~escaped ~quoted_string (value :: acc) r
| false, false, n -> go (succ n) ~escaped ~quoted_string acc r
| false, true, _ -> assert false )
| 0x29 ->
( match escaped, quoted_string, stack with
| true, _, 0 -> go stack ~escaped:false ~quoted_string (value :: acc) r
| true, _, _ -> go stack ~escaped:false ~quoted_string acc r
| false, true, 0 -> go 0 ~escaped ~quoted_string (value :: acc) r
| false, false, n -> go (pred n) ~escaped ~quoted_string acc r
| false, true, _ -> assert false )
| 0x5c ->
( match escaped, quoted_string, stack with
| true, _, 0 -> go stack ~escaped:false ~quoted_string (value :: acc) r
| true, _, _ -> go stack ~escaped:false ~quoted_string acc r
| false, _, _ -> go stack ~escaped:true ~quoted_string acc r )
| _ ->
if stack > 0
then go stack ~escaped:false ~quoted_string acc r
else go stack ~escaped:false ~quoted_string (if escaped then value :: `Uchar escape_uchar :: acc else value :: acc) r )
| value :: r ->
if stack > 0
then go stack ~escaped:false ~quoted_string acc r
else go stack ~escaped:false ~quoted_string (value :: acc) r in
go 0 ~escaped:false ~quoted_string:false [] lst
let replace_invalid_bytes ~f t =
List.fold_left (fun a -> function
| `Invalid_char chr ->
( match f chr with
| Some v -> v :: a
| None -> a )
| v -> v :: a) [] t
|> List.rev
let iter ~f l = List.iter f l
let fold ~f a l = List.fold_left f a l
let map ~f l = List.map f l
let wsp ~len = `WSP (String.make len ' ')
let tab ~len = `WSP (String.make len '\t')
let fws ?(tab = false) indent =
if indent <= 0 then invalid_arg "fws: invalid indent argument" ;
if tab then `FWS (String.make indent '\t') else `FWS (String.make indent ' ')
let split_at ~index l =
if index < 0 || index > List.length l then invalid_arg "split_at: index (%d) is invalid" index ;
let rec go n l r = match n with
| 0 -> List.rev l, r
| n -> match r with
| [] -> assert false | x :: r -> go (pred n) (x :: l) r in
go index [] l
let split_on ~on l =
let rec go l r = match r, on with
| [], _ -> None
| `CR :: r, `CR
| `LF :: r, `LF
| `WSP _ :: r, `WSP
| `FWS _ :: r, `FWS
| `d0 :: r, `Char '\000' ->
Some (List.rev l, r)
| `Uchar a :: r, `Uchar b
when Uchar.equal a b ->
Some (List.rev l, r)
| `Uchar a :: r, `Char b
when Uchar.equal a (Uchar.of_char b) ->
Some (List.rev l, r)
| `OBS_NO_WS_CTL a :: r, `Char b
when Char.equal a b ->
Some (List.rev l, r)
| x :: r, _ -> go (x :: l) r in
go [] l
let of_list l =
let has_cr = ref false in
let exception Break in
let f = function
| `LF -> if !has_cr then raise_notrace Break ; has_cr := false
| `CR -> has_cr := true
| _ -> has_cr := false in
try List.iter f l ; Ok l
with Break -> error_msgf "of_list: An unexpected CRLF token exists"
let fold_fws t =
let folder (fws, acc) = function
| `FWS wsp -> if fws then (fws, acc) else (true, `WSP wsp :: acc)
| x -> (false, x :: acc) in
List.fold_left folder (false, []) t |> fun (_, t) -> List.rev t
module type BUFFER = Lexer.BUFFER
module type MONAD = Lexer.MONAD
module Make = Lexer.Make
let lexbuf_make = Lexer.make
let post_process = Pp.pp