Source file slug.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
type t = string
type step = Fresh | Separator | Unknown
let regular_case ~separator ~unknown_char buf chars (f, state) =
let () =
match state with
| Fresh -> ()
| Separator -> if f then Buffer.add_char buf separator
| Unknown -> if f then Buffer.add_char buf unknown_char
in
let () = List.iter (Buffer.add_char buf) chars in
(true, Fresh)
let handle_space ~same ~unknown_char buf (f, state) =
match state with
| Fresh -> (f, Separator)
| Separator -> (f, Separator)
| Unknown ->
let () = if not same then Buffer.add_char buf unknown_char in
(f, Separator)
let handle_unknown ~same ~separator buf (f, state) =
match state with
| Fresh -> (f, Unknown)
| Unknown -> (f, Unknown)
| Separator ->
let () = if not same then Buffer.add_char buf separator in
(f, Unknown)
module M = Map.Make (Char)
let default_mapping =
[
('+', "plus")
; ('&', "and")
; ('$', "dollar")
; ('%', "percent")
; ('&', "and")
; ('<', "less")
; ('>', "greater")
; ('|', "or")
; ('@', "at")
; ('#', "hash")
; ('*', "")
; ('(', "")
; (')', "")
; ('[', "")
; (']', "")
; ('}', "")
; ('{', "")
; ('`', "")
]
let s x = x |> String.to_seq |> List.of_seq
let handle_uchar s i =
let decode = String.get_utf_8_uchar s i in
match Uchar.(decode |> utf_decode_uchar |> to_int) with
| 192 | 193 | 194 | 195 | 196 | 197 | 224 | 225 | 226 | 227 | 228 | 229 ->
Some (Some [ 'a' ])
| 200 | 201 | 202 | 203 | 232 | 233 | 234 | 235 -> Some (Some [ 'e' ])
| 204 | 205 | 206 | 207 | 236 | 237 | 238 | 239 -> Some (Some [ 'i' ])
| 210 | 211 | 212 | 213 | 214 | 216 | 240 | 248 | 242 | 243 | 244 | 245 | 246
->
Some (Some [ 'o' ])
| 217 | 218 | 219 | 220 | 249 | 250 | 251 | 252 -> Some (Some [ 'u' ])
| 198 | 230 -> Some (Some [ 'a'; 'e' ])
| 223 -> Some (Some [ 'b' ])
| 199 | 231 -> Some (Some [ 'c' ])
| 208 -> Some (Some [ 'd' ])
| 209 | 241 -> Some (Some [ 'n' ])
| 215 -> Some (Some [ 'x' ])
| 221 | 253 | 255 -> Some (Some [ 'y' ])
| 65533 -> Some None
| _ -> None
let from ?(mapping = default_mapping) ?(separator = '-') ?(unknown_char = '-')
fragment =
let mapping = M.of_list mapping in
let same = Char.equal separator unknown_char in
let reg = regular_case ~separator ~unknown_char in
let space = handle_space ~same ~unknown_char in
let unkn = handle_unknown ~same ~separator in
let fragment = fragment |> String.trim |> String.lowercase_ascii in
let buf = Buffer.create @@ String.length fragment in
let _ =
fragment
|> String.fold_left
(fun (state, i) -> function
| ('0' .. '9' | 'a' .. 'z') as l -> (reg buf [ l ] state, succ i)
| ' ' | '\t' | '\n' | '-' | '_' | '.' | ',' | ';' ->
(space buf state, succ i)
| c -> (
match M.find_opt c mapping with
| None -> (
match handle_uchar fragment i with
| Some (Some x) -> (reg buf x state, succ i)
| Some None -> (state, succ i)
| None -> (unkn buf state, succ i))
| Some "" -> (state, succ i)
| Some r ->
(state |> space buf |> reg buf (s r) |> space buf, succ i)))
((false, Fresh), 0)
in
buf |> Buffer.contents
let validate_from_str separator unknown_char =
String.for_all (function
| '0' .. '9' | 'a' .. 'z' -> true
| chr -> Char.equal chr separator || Char.equal chr unknown_char)
let validate_string ?(separator = '-') ?(unknown_char = '-') =
Data.Validation.where ~pp:Format.pp_print_string
~message:(fun x -> x ^ " is not a valid slug")
(validate_from_str separator unknown_char)
let validate ?separator ?unknown_char =
let open Data.Validation in
string & validate_string ?separator ?unknown_char