package query-json

  1. Overview
  2. Docs

Source file Tokenizer.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
open Sedlexing.Utf8

let digit = [%sedlex.regexp? '0' .. '9']
let number = [%sedlex.regexp? Plus digit, Opt '.', Opt (Plus digit)]
let space = [%sedlex.regexp? Plus ('\n' | '\t' | ' ')]

let identifier =
  [%sedlex.regexp? (alphabetic | '_'), Star (alphabetic | digit | '_')]

let not_double_quotes = [%sedlex.regexp? Compl '"']

type token =
  | NUMBER of float
  | STRING of string
  | BOOL of bool
  | IDENTIFIER of string
  | FUNCTION of string
  | OPEN_PARENT
  | CLOSE_PARENT
  | OPEN_BRACKET
  | CLOSE_BRACKET
  | OPEN_BRACE
  | CLOSE_BRACE
  | SEMICOLON
  | COLON
  | DOT
  | RECURSE
  | PIPE
  | QUESTION_MARK
  | COMMA
  | NULL
  | ADD
  | SUB
  | DIV
  | MULT
  | MODULO
  | AND
  | OR
  | EQUAL
  | NOT_EQUAL
  | GREATER
  | LOWER
  | GREATER_EQUAL
  | LOWER_EQUAL
  | RANGE
  | FLATTEN
  | IF
  | THEN
  | ELSE
  | ELIF
  | END
  | EOF
[@@deriving show]

let tokenize_string buf =
  let buffer = Buffer.create 10 in
  let rec read_string buf =
    [%sedlex
      match buf with
      | {|\"|} ->
          Buffer.add_char buffer '"';
          read_string buf
      | '"' -> Ok (STRING (Buffer.contents buffer))
      | not_double_quotes ->
          Buffer.add_string buffer (lexeme buf);
          read_string buf
      | _ -> Error "unmatched string"]
  in
  read_string buf

let tokenize_apply buf =
  let identifier = lexeme buf in
  match%sedlex buf with
  | '(' -> Ok (FUNCTION identifier)
  | _ -> Ok (IDENTIFIER identifier)

let rec tokenize buf =
  match%sedlex buf with
  | eof -> Ok EOF
  | '<' -> Ok LOWER
  | "<=" -> Ok LOWER_EQUAL
  | '>' -> Ok GREATER
  | ">=" -> Ok GREATER_EQUAL
  | "==" -> Ok EQUAL
  | "!=" -> Ok NOT_EQUAL
  | "+" -> Ok ADD
  | "and" -> Ok AND
  | "or" -> Ok OR
  | "-" -> Ok SUB
  | "*" -> Ok MULT
  | "/" -> Ok DIV
  | "%" -> Ok MODULO
  | "[" -> Ok OPEN_BRACKET
  | "]" -> Ok CLOSE_BRACKET
  | "{" -> Ok OPEN_BRACE
  | "}" -> Ok CLOSE_BRACE
  | "|" -> Ok PIPE
  | ";" -> Ok SEMICOLON
  | ":" -> Ok COLON
  | "," -> Ok COMMA
  | "?" -> Ok QUESTION_MARK
  | "null" -> Ok NULL
  | "true" -> Ok (BOOL true)
  | "false" -> Ok (BOOL false)
  | "(" -> Ok OPEN_PARENT
  | ")" -> Ok CLOSE_PARENT
  | "range" -> Ok RANGE
  | "flatten" -> Ok FLATTEN
  | "if" -> Ok IF
  | "then" -> Ok THEN
  | "else" -> Ok ELSE
  | "elif" -> Ok ELIF
  | "end" -> Ok END
  | "." -> Ok DOT
  | ".." -> Ok RECURSE
  | '"' -> tokenize_string buf
  | identifier -> tokenize_apply buf
  | number ->
      let num = lexeme buf in
      Ok (NUMBER (Float.of_string num))
  | space -> tokenize buf
  | any -> Error ("Unexpected character '" ^ lexeme buf ^ "'")
  | _ -> Error "Unexpected character"