Source file tyxml_ppx.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
open Tyxml_syntax
open Ppxlib.Asttypes
open Ppxlib.Parsetree
type lang = Common.lang = Html | Svg
let lang_of_ns loc ns =
  if ns = Markup.Ns.html || ns = "" then Common.Html
  else if ns = Markup.Ns.svg then Common.Svg
  else Common.error loc "Unknown namespace %s" ns
module Loc = struct
  (** 0-width locations do not show in the toplevel. We expand them to
      one-width.
  *)
  let one_width ?(ghost=false) pos =
    { Location.loc_ghost = ghost ;
      loc_start = pos ;
      loc_end = {pos with pos_cnum = pos.pos_cnum + 1}
    }
  (** Given a list of input strings for Markup.ml, evaluates to a function that
      converts Markup.ml locations of characters within these strings to their
      OCaml locations. *)
  let make_location_map located_strings =
    
    let starting_a_string = ref (fun _ -> ()) in
    let source =
      let strings = ref located_strings in
      let offset = ref 0 in
      let rec next_byte () = match !strings with
        | [] -> None
        | (s, loc)::rest ->
          if !offset = 0 then !starting_a_string loc;
          if !offset < String.length s then begin
            offset := !offset + 1;
            Some (s.[!offset - 1])
          end
          else begin
            offset := 0;
            strings := rest;
            next_byte ()
          end
      in
      Markup.fn next_byte
    in
    
    let location_map =
      let preprocessed_input_stream, get_markupml_location =
        source
        |> Markup.Encoding.decode Markup.Encoding.utf_8
        |> Markup.preprocess_input_stream
      in
      let location_map = ref [] in
      starting_a_string := begin fun ocaml_position ->
        location_map :=
          (get_markupml_location (), ocaml_position)::!location_map
      end;
      Markup.drain preprocessed_input_stream;
      List.rev !location_map
    in
    
    fun given_markup_location ->
      
      let rec bounded_maximum best = function
        | [] -> best
        | ((noted_markup_location, _) as loc)::rest ->
          if Markup.compare_locations
               noted_markup_location given_markup_location > 0 then best
          else bounded_maximum (Some loc) rest
      in
      let preceding_markup_location, preceding_ocaml_position =
        match bounded_maximum None location_map with
        | Some loc -> loc
        | None -> assert false
      in
      let line, column = given_markup_location in
      let line', column' = preceding_markup_location in
      let ocaml_position =
        let open Lexing in
        if line = line' then
          {preceding_ocaml_position with
            pos_cnum = preceding_ocaml_position.pos_cnum + column - column'}
        else
          {preceding_ocaml_position with
            pos_lnum = preceding_ocaml_position.pos_lnum + line - line';
            pos_bol = 0;
            pos_cnum = column - 1}
      in
      one_width ocaml_position
end
(** Antiquotations
    We replace antiquotations expressions by a dummy token "(tyxmlX)".
    We store a table token to expression to retrieve them after parsing.
*)
module Antiquot = struct
  let fmt_id = Printf.sprintf "(tyxml%i)"
  let regex_id = Re.(seq [ str "(tyxml" ; rep digit ; char ')' ])
  let re_id = Re.compile regex_id
  let make_id =
    let r = ref 0 in
    fun () -> incr r ; fmt_id !r
  module H = Hashtbl.Make(struct
      type t = string
      let hash = Hashtbl.hash
      let equal (x:string) y = x = y
    end)
  let tbl = H.create 17
  let create expr =
    let s = make_id () in
    H.add tbl s expr ;
    s
  let get loc s =
    if H.mem tbl s then H.find tbl s
    else
      Common.error loc
        "Internal error: This expression placeholder is not registered"
  let contains loc s = match Re.exec_opt re_id s with
    | None -> `No
    | Some g ->
      let (i,j) = Re.Group.offset g 0 in
      let is_whole = i = 0 && j = String.length s in
      if is_whole
      then `Whole (get loc s)
      else `Yes (get loc @@ Re.Group.get g 0)
  let assert_no_antiquot ~loc kind (_namespace,s) =
    match contains loc s with
    | `No -> ()
    | `Yes e | `Whole e ->
      Common.error e.pexp_loc
        "OCaml expressions are not accepted as %s names" kind
end
(** Building block to rebuild the output with expressions intertwined. *)
(** Walk the text list to replace placeholders by OCaml expressions when
    appropriate. Use {!make_txt} on the rest. *)
let make_text ~loc ~lang ss =
  let buf = Buffer.create 17 in
  let push_txt buf l =
    let s = Buffer.contents buf in
    Buffer.clear buf ;
    if s = "" then l else Common.value (Common.txt ~loc ~lang s) :: l
  in
  let rec aux ~loc res = function
    | [] -> push_txt buf res
    | `Text s :: t ->
        Buffer.add_string buf s ;
        aux ~loc res t
    | `Delim g :: t ->
      let e = Antiquot.get loc @@ Re.Group.get g 0 in
      aux ~loc (Common.antiquot e :: push_txt buf res) t
  in
  aux ~loc [] @@ Re.split_full Antiquot.re_id @@ String.concat "" ss
let replace_attribute ~loc ((ns,attr_name),value) =
  let attr = (lang_of_ns loc ns, attr_name) in
  Antiquot.assert_no_antiquot ~loc "attribute" attr ;
  match Antiquot.contains loc value with
  | `No -> (attr, Common.value value)
  | `Whole e -> (attr, Common.antiquot e)
  | `Yes _ ->
      Common.error loc
      "Mixing literals and OCaml expressions is not supported in attribute values"
(** Processing *)
(** Takes the ast and transforms it into a Markup.ml char stream.
    The payload [expr] is either a single token, or an application (that is, a list).
    A token is either a string or an antiquotation. Antiquotations are replaced
    by placeholder strings (see {!Antiquot}).
    Each token is equipped with a starting (but no ending) position.
*)
let ast_to_stream expressions =
  let strings =
    expressions |> List.map @@ fun expr ->
    match expr.pexp_desc with
    | Pexp_constant (Pconst_string (s, loc, _)) ->
      (s, loc.loc_start)
    | _ ->
      (Antiquot.create expr, expr.pexp_loc.loc_start)
  in
  let source =
    let items = ref strings in
    let offset = ref 0 in
    let rec next_byte () = match !items with
      | [] -> None
      | (s, _)::rest ->
        if !offset < String.length s then begin
          offset := !offset + 1;
          Some (s.[!offset - 1])
        end
        else begin
          offset := 0;
          items := rest;
          next_byte ()
        end
    in
    Markup.fn next_byte
  in
  source, Loc.make_location_map strings
let context_of_lang = function
  | Common.Svg -> Some (`Fragment "svg")
  | Html -> None
(** Given the payload of a [%html ...] or [%svg ...] expression,
    converts it to a TyXML expression representing the markup
    contained therein. *)
let markup_to_expr lang loc expr =
  let context = context_of_lang lang in
  let input_stream, adjust_location = ast_to_stream expr in
  let report loc error = 
    match error with
    | `Bad_content _ -> ()
    | _ ->
      let loc = adjust_location loc in
      let message =
        Markup.Error.to_string error |> String.capitalize_ascii
      in
      Common.error loc "%s" message
  in
  let parser =
    Markup.parse_html
      ?context
      ~encoding:Markup.Encoding.utf_8
      ~report
      input_stream
  in
  let signals = Markup.signals parser in
  let get_loc () = adjust_location @@ Markup.location parser in
  let rec assemble lang children =
    match Markup.next signals with
    | None | Some `End_element -> List.rev children
    | Some (`Text ss) ->
      let loc = get_loc () in
      let node = make_text ~loc ~lang ss in
      assemble lang (node @ children)
    | Some (`Start_element ((ns, elt_name), attributes)) ->
      let newlang = lang_of_ns loc ns in
      let name = (newlang, elt_name) in
      let loc = get_loc () in
      let sub_children = assemble newlang [] in
      Antiquot.assert_no_antiquot ~loc "element" name ;
      let attributes = List.map (replace_attribute ~loc) attributes in
      let node =
        Element.parse
          ~parent_lang:lang ~loc ~name ~attributes sub_children
      in
      assemble lang (Common.Val node :: children)
    | Some (`Comment s) ->
      let loc = get_loc () in
      let node = Common.value @@ Element.comment ~loc ~lang s in
      assemble lang (node :: children)
    | Some (`Xml _ | `Doctype _ | `PI _)  ->
      assemble lang children
  in
  let l =
    Element_content.filter_surrounding_whitespace @@
    assemble lang []
  in
  match l  with
  | [ Val x | Antiquot x ] -> x
  | l -> Common.list_wrap_value lang loc l
let markup_to_expr_with_implementation lang modname loc expr =
  match modname with
  | Some modname ->
    let current_modname = Common.implementation lang in
    Common.set_implementation lang modname ;
    let res = markup_to_expr lang loc expr in
    Common.set_implementation lang current_modname ;
    res
  | _ ->
    markup_to_expr lang loc expr
let is_capitalized s =
  if String.length s < 0 then false
  else match s.[0] with
    | 'A'..'Z' -> true
    | _ -> false
(** Extract and verify the modname in the annotation [%html.Bar.Baz .. ]. *)
let get_modname = function
  | None -> None
  | Some {txt = longident ; loc} ->
    let l = Longident.flatten_exn longident in
    let s = String.concat "." l in
    if l = [] then None
    else if not (List.for_all is_capitalized l) then
      Common.error loc "This identifier is not a module name"
    else Some s
let application_to_list expr =
  match expr.pexp_desc with
  | Pexp_apply (f, arguments) -> f::(List.map snd arguments)
  | _ -> [expr]
let markup_cases ~lang ~modname cases =
  let f ({pc_rhs} as case) =
    let loc = pc_rhs.pexp_loc in
    let pc_rhs =
      markup_to_expr_with_implementation lang modname loc @@
      application_to_list pc_rhs
    in {case with pc_rhs}
  in
  List.map f cases
let rec markup_function ~lang ~modname e =
  let loc = e.pexp_loc in
  match e.pexp_desc with
  | Pexp_fun (label,def,pat,content) ->
    let content = markup_function ~lang ~modname content in
    {e with pexp_desc = Pexp_fun (label,def,pat,content)}
  | Pexp_function cases ->
    let cases = markup_cases ~lang ~modname cases in
    {e with pexp_desc = Pexp_function cases}
  | _ ->
    markup_to_expr_with_implementation lang modname loc @@
    application_to_list e
let markup_bindings ~lang ~modname l =
  let f ({pvb_expr} as b) =
    let pvb_expr = markup_function ~lang ~modname pvb_expr in
    {b with pvb_expr}
  in
  List.map f l
let expand_expr ~lang ~loc:_ ~path:_ ~arg e _ =
  let modname = get_modname arg in
  match e.pexp_desc with
  | Pexp_let (recflag, bindings, next) ->
    let bindings = markup_bindings ~lang ~modname bindings in
    {e with pexp_desc = Pexp_let (recflag, bindings, next)}
  | _ ->
    markup_to_expr_with_implementation lang modname e.pexp_loc @@
    application_to_list e
  
let expand_str_item ~lang ~loc:_ ~path:_ ~arg recflag value_bindings =
  let bindings =
    markup_bindings ~lang ~modname:(get_modname arg) value_bindings
  in 
  Ppxlib.Ast_helper.Str.value recflag bindings