Source file preprocessor.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
type tokenizer = unit -> Parser.token * Term_base.parsed_pos
let mk_tokenizer ?(fname = "") lexbuf =
Sedlexing.set_filename lexbuf fname;
fun () ->
match Lexer.token lexbuf with
| Parser.PP_STRING (c, s, pos) -> (Parser.STRING (c, s), pos)
| Parser.PP_REGEXP (r, flags, pos) -> (Parser.REGEXP (r, flags), pos)
| token -> (token, Sedlexing.lexing_bytes_positions lexbuf)
type exp_item = String of string | Expr of tokenizer | End
exception Found_interpolation
let expand_string ?fname tokenizer =
let state = Queue.create () in
let add pos x = Queue.add (x, pos) state in
let pop () = ignore (Queue.take state) in
let clear () = Queue.clear state in
let is_interpolating () =
try
Queue.iter
(function Expr _, _ -> raise Found_interpolation | _ -> ())
state;
false
with Found_interpolation -> true
in
let parse ~sep s pos =
let rex = Re.Pcre.regexp "#\\{([^}]*)\\}" in
let l = Re.Pcre.full_split ~rex s in
let l = if l = [] then [Re.Pcre.Text s] else l in
let add = add pos in
let rec parse = function
| Re.Pcre.Group (_, x) :: l ->
let x = Lexer.render_string ~pos ~sep x in
let lexbuf = Sedlexing.Utf8.from_string x in
let tokenizer = mk_tokenizer ?fname lexbuf in
let tokenizer () = (fst (tokenizer ()), pos) in
add (Expr tokenizer);
parse l
| Re.Pcre.Text x :: l ->
add (String x);
parse l
| Re.Pcre.NoGroup :: l | Re.Pcre.Delim _ :: l -> parse l
| [] -> add End
in
parse l
in
let rec token () =
if Queue.is_empty state then (
match tokenizer () with
| (Parser.STRING (sep, s), pos) as tok ->
parse ~sep s pos;
if is_interpolating () then (Parser.BEGIN_INTERPOLATION sep, pos)
else (
clear ();
tok)
| x -> x)
else (
let el, pos = Queue.peek state in
match el with
| String s ->
pop ();
(Parser.INTERPOLATED_STRING s, pos)
| Expr tokenizer -> (
match tokenizer () with
| Parser.EOF, _ ->
pop ();
token ()
| x, _ -> (x, pos))
| End ->
pop ();
(Parser.END_INTERPOLATION, pos))
in
token
(** Special token in order to avoid 3.{s = "a"} to be parsed as a float followed
by a record. *)
let int_meth tokenizer =
let q = Queue.create () in
let fill () =
match tokenizer () with
| Parser.PP_INT_DOT_LCUR n, (spos, epos) ->
let a n pos =
{ pos with Lexing.pos_cnum = pos.Lexing.pos_cnum - n }
in
Queue.add_seq q
(List.to_seq
[
(Parser.INT n, (spos, a 2 epos));
(Parser.DOT, (a 2 spos, a 1 epos));
(Parser.LCUR, (a 1 spos, epos));
])
| t -> Queue.add t q
in
let token () =
if Queue.is_empty q then fill ();
Queue.pop q
in
token
let dotvar tokenizer =
let state = ref None in
let token () =
match !state with
| Some t ->
state := None;
t
| None -> (
match tokenizer () with
| Parser.DOTVAR v, pos ->
state := Some (Parser.VAR v, pos);
(Parser.DOT, pos)
| t -> t)
in
token
(** Change MINUS to UMINUS if the minus is not preceded by a number (or an
expression which could produce a number). *)
let uminus tokenizer =
let no_uminus = ref false in
let token () =
match tokenizer () with
| ( Parser.INT _, _
| Parser.FLOAT _, _
| Parser.VAR _, _
| Parser.RPAR, _
| Parser.RCUR, _ ) as t ->
no_uminus := true;
t
| Parser.MINUS, pos when not !no_uminus ->
no_uminus := false;
(Parser.UMINUS, pos)
| t ->
no_uminus := false;
t
in
token
let strip_newlines tokenizer =
let state = ref None in
let rec token () =
let inject_varlpar var v =
match tokenizer () with
| Parser.LPAR, (_, endp) ->
state := None;
let startp = fst (snd v) in
(Parser.VARLPAR var, (startp, endp))
| Parser.LBRA, (_, endp) when var <> "in" ->
state := None;
let startp = fst (snd v) in
(Parser.VARLBRA var, (startp, endp))
| Parser.PP_ENDL, _ ->
state := None;
v
| x ->
state := Some x;
v
in
match !state with
| None -> (
match tokenizer () with
| Parser.PP_ENDL, _ -> token ()
| (Parser.VAR _, _) as v ->
state := Some v;
token ()
| x -> x)
| Some ((Parser.VAR var, _) as v) -> inject_varlpar var v
| Some ((Parser.UNDERSCORE, _) as v) -> inject_varlpar "_" v
| Some x ->
state := None;
x
in
token
let mk_tokenizer ?fname lexbuf =
let tokenizer =
mk_tokenizer ?fname lexbuf |> expand_string ?fname |> int_meth |> dotvar
|> uminus |> strip_newlines
in
fun () ->
let t, (startp, endp) = tokenizer () in
(t, startp, endp)