package mehari

  1. Overview
  2. Docs

Source file gemtext.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
type t = line list

and line =
  | Text of string
  | Link of { url : string; name : string option }
  | Preformat of preformat
  | Heading of [ `H1 | `H2 | `H3 ] * string
  | ListItem of string
  | Quote of string

and preformat = { alt : string option; text : string }

let text t = Text t
let newline = Text ""
let link ?name url = Link { url; name }
let preformat ?alt text = Preformat { alt; text }
let heading h text = Heading (h, text)
let list_item text = ListItem text
let quote text = Quote text

let to_string lines =
  List.map
    (function
      | Text t -> t
      | Link { url; name } ->
          Option.fold name ~none:"" ~some:(Printf.sprintf " %s")
          |> Printf.sprintf "=> %s%s" url
      | Preformat { alt; text } ->
          Printf.sprintf "```%s\n%s\n```" (Option.value alt ~default:"") text
      | Heading (`H1, text) -> Printf.sprintf "# %s" text
      | Heading (`H2, text) -> Printf.sprintf "## %s" text
      | Heading (`H3, text) -> Printf.sprintf "### %s" text
      | ListItem text -> Printf.sprintf "* %s" text
      | Quote text -> Printf.sprintf ">%s" text)
    lines
  |> String.concat "\n"

module Regex = struct
  let spaces = Re.(rep (alt [ char ' '; char '\t' ]))

  let line prefix =
    Re.compile Re.(seq [ bol; prefix; spaces; group (rep1 any) ])

  let h1 = line (Re.char '#')
  let h2 = line (Re.str "##")
  let h3 = line (Re.str "###")
  let item = line (Re.str "* ")
  let quote = Re.compile Re.(seq [ bol; Re.char '>'; group (rep1 any) ])

  let link =
    Re.compile
      Re.(
        seq
          [
            str "=>";
            spaces;
            group (rep1 (compl [ space ]));
            opt (seq [ spaces; group (rep1 any) ]);
          ])
end

type line_feed = LF | CRLF | EOF

(* Preserve line feed information to not erase it in pre-formatted blocks. *)
let show = function LF -> "\n" | CRLF -> "\r\n" | EOF -> ""

let split_lines text =
  let buf = Buffer.create 8192 in
  let acc = ref [] in
  let cr = ref false in
  for i = 0 to String.length text - 1 do
    match String.unsafe_get text i with
    | '\r' -> cr := true
    | '\n' when !cr ->
        let content = Buffer.contents buf in
        Buffer.reset buf;
        cr := false;
        acc := (content, CRLF) :: !acc
    | '\n' ->
        let content = Buffer.contents buf in
        Buffer.reset buf;
        acc := (content, LF) :: !acc
    | c when !cr ->
        cr := false;
        Buffer.add_char buf '\r';
        Buffer.add_char buf c
    | c -> Buffer.add_char buf c
  done;
  if !cr then Buffer.add_char buf '\r';
  acc := (Buffer.contents buf, EOF) :: !acc;
  List.rev !acc

let of_string text =
  let buf = Buffer.create 4096 in
  let rec loop acc is_preformat alt = function
    | [] -> List.rev acc
    | (l, feed) :: ls -> (
        match (String.starts_with ~prefix:"```" l, is_preformat) with
        | true, true ->
            let text = Buffer.contents buf in
            Buffer.reset buf;
            let alt =
              match String.sub l 3 (String.length l - 3) with
              | "" -> None
              | alt -> Some alt
            in
            loop (Preformat { alt; text } :: acc) false None ls
        | true, false -> loop acc true alt ls
        | false, true ->
            Buffer.add_string buf l;
            Buffer.add_string buf (show feed);
            loop acc is_preformat alt ls
        | false, false ->
            let frgmt =
              if l = "" then Text ""
              else
                match Re.exec_opt Regex.h3 l with
                | Some grp -> Heading (`H3, Re.Group.get grp 1)
                | None -> (
                    match Re.exec_opt Regex.h2 l with
                    | Some grp -> Heading (`H2, Re.Group.get grp 1)
                    | None -> (
                        match Re.exec_opt Regex.h1 l with
                        | Some grp -> Heading (`H1, Re.Group.get grp 1)
                        | None -> (
                            match Re.exec_opt Regex.item l with
                            | Some grp -> ListItem (Re.Group.get grp 1)
                            | None -> (
                                match Re.exec_opt Regex.quote l with
                                | Some grp -> Quote (Re.Group.get grp 1)
                                | None -> (
                                    match Re.exec_opt Regex.link l with
                                    | None -> Text l
                                    | Some grp ->
                                        let url, name =
                                          ( Re.Group.get grp 1,
                                            Re.Group.get_opt grp 2 )
                                        in
                                        Link { url; name })))))
            in
            loop (frgmt :: acc) is_preformat alt ls)
  in
  split_lines text |> loop [] false None

let paragraph gemtext str =
  let doc = ref [] in
  let cr = ref false in
  let buf = Buffer.create 4096 in
  for i = 0 to String.length str - 1 do
    match String.unsafe_get str i with
    | '\r' -> cr := true
    | '\n' when !cr ->
        let line = Buffer.contents buf in
        Buffer.reset buf;
        doc := gemtext line :: !doc;
        cr := false
    | '\n' ->
        let line = Buffer.contents buf in
        Buffer.reset buf;
        doc := gemtext line :: !doc;
        cr := false
    | c ->
        if !cr then Buffer.add_char buf '\r';
        Buffer.add_char buf c;
        cr := false
  done;
  (match Buffer.contents buf with "" -> !doc | line -> gemtext line :: !doc)
  |> List.rev

let pp fmt g = Format.fprintf fmt "%s" (to_string g)