package ppx_nanocaml

  1. Overview
  2. Docs

Source file lang.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
open Batteries
open Ast

(** a type recognized by nanopass; usually a part of a production.
    e.g. [string], [stmt], [(string * expr) list] **)
type np_type =
  | NP_term of core_type      (** external types are "terminals" **)
  | NP_nonterm of string      (** named nonterminal **)
  | NP_tuple of np_type list  (** [t * u * ...] **)
  | NP_list of np_type        (** [t list] **)

(** a production is one of the forms in a nonterminal -- essentially
    just a variant, e.g. [`Var], [`App]. **)
type np_production =
  { nppr_name : string
  ; nppr_arg : np_type option }

(** a nonterminal is a type defined by a nanopass language, e.g.
    [expr], [stmt]. **)
type np_nonterm =
  { npnt_loc : Location.t
  ; npnt_name : string
  ; npnt_productions : np_production list }

(** a nanopass language, e.g. L0, L1 (as traditionally named) **)
type np_language =
  { npl_loc : Location.t
  ; npl_name : string
  ; npl_nonterms : np_nonterm list (* TODO: hash tbl? *)
  }


let rec string_of_type = function
  | NP_term core_type ->
    begin match core_type.ptyp_desc with
      | Ptyp_constr ({txt = l}, _) -> "<core> (" ^ List.last (Longident.flatten l) ^ ")"
      | _ -> "<core>"
    end
  | NP_nonterm s -> s
  | NP_tuple t -> "(" ^ String.concat "," (List.map string_of_type t) ^ ")"
  | NP_list t -> "[" ^ string_of_type t ^ "]"

(** global table of all defined languages. **)
(* TODO: nv wants to make this into a real database,
     which would allow caching, cross-file nanopass, etc. *)
let languages : (string, np_language) Hashtbl.t
  = Hashtbl.create 30

(** globally registers the given language. raises
    [Location.Error] if a language with the same
    name is already defined. **)
let add_language lang =
  if Hashtbl.mem languages lang.npl_name then
    Location.raise_errorf ~loc:lang.npl_loc
      "language %S defined already"
      lang.npl_name
  else
    Hashtbl.add languages lang.npl_name lang

(** returns the language with the given name. raises
    [Not_found] if no such language has been defined. **)
let find_language ?(exn=Not_found) name =
  Option.get_exn
    (Hashtbl.find_option languages name)
    exn

(** [language_nonterm l name] returns the nonterminal
    in language [l] with the given name. raises [Not_found]
    if no such nonterminal. *)
let language_nonterm ?(exn=Not_found) lang name =
  List.find_exn
    (fun nt -> nt.npnt_name = name)
    exn lang.npl_nonterms



(** convert [core_type] into nanopass type. **)
let type_of_core_type ~nt_names t =
  let rec cvt ptyp =
    match ptyp.ptyp_desc with
    (* nonterminal: *)
    | Ptyp_constr ({txt = Longident.Lident name}, [])
      when List.mem name nt_names ->
      NP_nonterm name
    (* tuple: *)
    | Ptyp_tuple typs ->
      let npts = List.map cvt typs in
      NP_tuple npts
    (* list: *)
    | Ptyp_constr ({txt = Longident.Lident "list"}, [elem]) ->
      NP_list (cvt elem)
    (* otherwise, it's a terminal: *)
    | _ ->
      NP_term ptyp
  in
  cvt t

(** convert [row_field] (from polymorphic variant) into nanopass production **)
let production_of_row_field ~nt_names =
  function
  | Rtag (name, _, _, args) ->
     {nppr_name = name;
      nppr_arg = match args with
                 | [t] -> Some (type_of_core_type ~nt_names t)
                 | _ -> None}

  | Rinherit {ptyp_loc = loc} ->
     Location.raise_errorf ~loc
       "invalid nanopass production form"

(** convert [type_declaration] into nanopass nonterminal **)
let nonterm_of_type_decl ?extending ~nt_names = function
  (* type nt = [ `A | `B ... ] *)
  | {ptype_name = {txt = name};
     ptype_loc = loc;
     ptype_params = [];
     ptype_kind = Ptype_abstract;
     ptype_manifest = Some {ptyp_desc = Ptyp_variant (rows, Closed, _)}}
    ->
     let prods = List.map (production_of_row_field ~nt_names) rows in
     {npnt_loc = loc;
      npnt_name = name;
      npnt_productions = prods}

  (* type nt = { add : [ `A ... ] ; del : [ `B ... ] } *)
  | {ptype_name = {txt = name};
     ptype_loc = loc;
     ptype_params = [];
     ptype_kind = Ptype_record decls}
    ->
     let lang =
       Option.get_exn extending
         (Location.Error
            (Location.errorf ~loc "must be extending a language to use this form"))
     in
     let old_nontem =
       language_nonterm lang name
         ~exn:(Location.Error
                 (Location.errorf ~loc "no such nonterminal %S in language %S"
                    name lang.npl_name))
     in

     (* get the 'lname' label out of the record, and parse
        the productions contained in the type. *)
     let get_prods lname =
       match List.find_opt
               (fun {pld_name = {txt = x}} -> x = lname)
               decls
       with
       | None -> None
       | Some {pld_type = {ptyp_desc = Ptyp_variant (rows, Closed, _)}} ->
          Some (List.map (production_of_row_field ~nt_names) rows)
       | Some _ ->
          Location.raise_errorf ~loc
            "invalid extended production"
     in

     (* create functions for adding productions / deleting productions
        if the 'add' or 'del' labels are omitted, then nothing is added / removed. *)
     let add =
       Option.map_default
         (fun add_prs -> List.append add_prs)
         identity (* do nothing when [None] *)
         (get_prods "add")
     in
     let del =
       Option.map_default
         (fun del_prs ->
           let keep p = List.for_all (fun p' -> p.nppr_name <> p'.nppr_name) del_prs in
           List.filter keep)
         identity
         (get_prods "del")
     in

     let prods = old_nontem.npnt_productions |> del |> add in
     {npnt_loc = loc;
      npnt_name = name;
      npnt_productions = prods}

  (* invalid nonterminal *)
  | {ptype_loc = loc} ->
     Location.raise_errorf ~loc
       "invalid nanopass type declaration form"

(** convert [module_binding] into nanopass language **)
let language_of_module = function
  (* module L = struct type nt = ... end *)
  (* must be one single recursive type decl *)
  | {pmb_name = {txt = lang_name};
     pmb_loc = loc;
     pmb_expr =
       {pmod_desc =
          Pmod_structure
            [ {pstr_desc = Pstr_type (Recursive, type_decls)} ]}}
    ->
     let nt_names = List.map (fun {ptype_name = {txt}} -> txt) type_decls in
     let nonterms = List.map (nonterm_of_type_decl ~nt_names) type_decls in
     {npl_loc = loc;
      npl_name = lang_name;
      npl_nonterms = nonterms}

  (* module L = struct
       include L'
       type nt = ...
     end *)
  (* must be a single include + a single recursive type decl*)
  | {pmb_name = {txt = lang_name};
     pmb_loc = loc;
     pmb_expr =
       {pmod_desc =
          Pmod_structure
            [ {pstr_desc =
                 Pstr_include
                   {pincl_mod =
                      {pmod_desc =
                         Pmod_ident {txt = Lident ext_lang_name}}}};
              {pstr_desc = Pstr_type (Recursive, type_decls)} ]}}
    ->
     (* the language we are extending *)
     let ext_lang =
       find_language ext_lang_name
         ~exn:(Location.Error
                 (Location.errorf ~loc
                    "language %S has not been defined" ext_lang_name))
     in

     (* new nonterminal names *)
     let nt_names = List.map (fun {ptype_name = {txt}} -> txt) type_decls in
     (* old nonterminal names *)
     let nt_names' = List.map (fun {npnt_name} -> npnt_name) ext_lang.npl_nonterms in
     (* new nonterminals *)
     let nonterms =
       List.map (nonterm_of_type_decl
                   ~extending:ext_lang
                   ~nt_names:(nt_names @ nt_names'))
         type_decls
     in
     (* old nonterminals (only the unmodified ones) *)
     let nonterms' =
       List.filter_map (fun name ->
           if List.mem name nt_names then
             None
           else
             Some (language_nonterm ext_lang name))
         nt_names'
     in

     {npl_loc = loc;
      npl_name = lang_name;
      npl_nonterms = nonterms @ nonterms'}

  | {pmb_loc = loc} ->
     Location.raise_errorf ~loc
       "invalid nanopass language form"
OCaml

Innovation. Community. Security.