Source file script_parser.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
open Base
module Position = struct
type t = {
cnum : int ;
bol : int ;
lnum : int ;
}
[@@deriving sexp]
let zero = { cnum = 0 ; bol = 0 ; lnum = 0 }
let shift p n = { p with cnum = p.cnum + n }
let newline p =
{ cnum = p.cnum + 1 ;
bol = p.cnum + 1 ;
lnum = p.lnum + 1 }
let translate_lexing_position (q : Lexing.position) ~by:p =
{
q with pos_lnum = p.lnum + q.pos_lnum ;
pos_bol = if p.lnum = 0 then q.pos_bol - 2 else q.pos_cnum + p.bol ;
pos_cnum = p.cnum + q.pos_cnum
}
end
type token = [
| `Text of Position.t * Position.t
| `Antiquotation of Position.t * Position.t
]
[@@deriving sexp]
type lexing_error = [
| `Unmatched_opening_bracket of Position.t
| `Unmatched_closing_bracket of Position.t
]
[@@deriving sexp]
type lexing_result = (token list, lexing_error) Result.t
[@@deriving sexp]
let lexer s : lexing_result =
let n = String.length s in
let opening i =
i < n - 1 && Char.(s.[i] = '{' && s.[i + 1] = '{')
and closing i =
i < n - 1 && Char.(s.[i] = '}' && s.[i + 1] = '}')
in
let classify_current_pos { Position.cnum = i ; _ } =
if i = n then `EOI
else if Char.(s.[i] = '\n') then `Newline
else if opening i then `Opening_bracket
else if closing i then `Closing_bracket
else `Text
in
let add_text_item acc start stop =
if Position.(start.cnum < stop.cnum - 1) then `Text (start, stop) :: acc else acc
in
let rec loop pos state acc =
match classify_current_pos pos, state with
| `EOI, `Quotation p ->
Ok (List.rev (add_text_item acc p pos))
| `EOI, `Antiquotation (bracket_pos, _) ->
Error (`Unmatched_opening_bracket bracket_pos)
| `Opening_bracket, `Quotation p ->
let newpos = Position.shift pos 2 in
loop newpos (`Antiquotation (pos, newpos)) (add_text_item acc p pos)
| `Opening_bracket, `Antiquotation _ ->
loop (Position.shift pos 2) state acc
| `Closing_bracket, `Quotation _ ->
Error (`Unmatched_closing_bracket pos)
| `Closing_bracket, `Antiquotation (_, p) ->
let newpos = Position.shift pos 2 in
loop newpos (`Quotation newpos) (`Antiquotation (p, pos) :: acc)
| `Newline, _ ->
loop (Position.newline pos) state acc
| `Text, _ ->
loop (Position.shift pos 1) state acc
in
loop Position.zero (`Quotation Position.zero) []
let print_lexing_result r =
r
|> sexp_of_lexing_result
|> Sexp.to_string_hum
|> Stdio.print_string
let%expect_test "text only" =
print_lexing_result @@ lexer "rien" ;
[%expect {| (Ok ((Text (((cnum 0) (bol 0) (lnum 0)) ((cnum 4) (bol 0) (lnum 0)))))) |}]
let%expect_test "text + antiquot" =
print_lexing_result @@ lexer "ri{{en}}{{}}";
[%expect {|
(Ok
((Text (((cnum 0) (bol 0) (lnum 0)) ((cnum 2) (bol 0) (lnum 0))))
(Antiquotation (((cnum 4) (bol 0) (lnum 0)) ((cnum 6) (bol 0) (lnum 0))))
(Antiquotation (((cnum 10) (bol 0) (lnum 0)) ((cnum 10) (bol 0) (lnum 0)))))) |}]
let%expect_test "text + antiquot + eol" =
print_lexing_result @@ lexer "ri{{en}}\n{{du \n tout}}";
[%expect {|
(Ok
((Text (((cnum 0) (bol 0) (lnum 0)) ((cnum 2) (bol 0) (lnum 0))))
(Antiquotation (((cnum 4) (bol 0) (lnum 0)) ((cnum 6) (bol 0) (lnum 0))))
(Antiquotation
(((cnum 11) (bol 9) (lnum 1)) ((cnum 20) (bol 15) (lnum 2)))))) |}]