Source file lang.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
(** *)
module L = Sedlexing.Utf8
let digit = [%sedlex.regexp? '0'..'9']
let alpha = [%sedlex.regexp? 'a'..'z' | 'A'..'Z']
let alphanum = [%sedlex.regexp? digit | alpha]
let script = [%sedlex.regexp? alpha,alpha,alpha,alpha]
let region = [%sedlex.regexp? (alpha,alpha) | (digit,digit,digit)]
let singleton = [%sedlex.regexp? digit
| 0x41 .. 0x57 | 0x59 .. 0x5A | 0x61 .. 0x77 | 0x79 .. 0x7A ]
let privateuse = [%sedlex.regexp? "x", Plus("-",alphanum,alphanum,Opt(alphanum),Opt(alphanum),Opt(alphanum),Opt(alphanum),Opt(alphanum),Opt(alphanum))]
let extension = [%sedlex.regexp?
singleton, Plus("-", alphanum,alphanum,Opt(alphanum),Opt(alphanum),Opt(alphanum),Opt(alphanum),Opt(alphanum),Opt(alphanum))]
let variant = [%sedlex.regexp?
(alphanum,alphanum,alphanum,alphanum,alphanum,Opt(alphanum),Opt(alphanum),Opt(alphanum))
| (digit, alphanum, alphanum, alphanum)
]
let extlang = [%sedlex.regexp?
(alpha,alpha,alpha)
| (("-",alpha,alpha,alpha),Plus("-",alpha,alpha,alpha))
]
let language = [%sedlex.regexp?
(alpha,alpha,Opt(alpha),Opt("-",extlang))
| (alpha,alpha,alpha,alpha),Opt(alpha),Opt(alpha),Opt(alpha),Opt(alpha)]
let langtag = [%sedlex.regexp? language,
Opt("-", script),
Opt('-', region),
Star("-", variant),
Star("-", extension),
Opt("-", privateuse) ]
let irregular = [%sedlex.regexp?
"en-GB-oed" | "i-ami" | "i-bnn" | "i-default"
| "i-enochian" | "i-hak" | "i-klingon" | "i-lux"
| "i-mingo" | "i-navajo" | "i-pwn" | "i-tao"
| "i-tay" | "i-tsu" | "sgn-BE-FR" | "sgn-BE-NL"
| "sgn-CH-DE" ]
let regular = [%sedlex.regexp?
"art-lojban"
| "cel-gaulish"
| "no-bok"
| "no-nyn"
| "zh-guoyu"
| "zh-hakka"
| "zh-min"
| "zh-min-nan"
| "zh-xiang"
]
let grandfathered = [%sedlex.regexp? irregular | regular ]
let language_tag = [%sedlex.regexp? (langtag | privateuse | grandfathered), eof ]
let language_tag lexbuf =
match%sedlex lexbuf with
| language_tag -> ()
| _ -> failwith "Invalid language tag"
let is_valid_language_tag str =
let lexbuf = Sedlexing.Utf8.from_string str in
try language_tag lexbuf; true
with Failure _ -> false