package hilite

  1. Overview
  2. Docs
Build time syntax highlighting

Install

dune-project
 Dependency

Authors

Maintainers

Sources

hilite-0.5.0.tbz
sha256=550c01abe4a95808553693499dcb75ef87bd86127db8f3db1f94a81689e13a15
sha512=b42375e1dd288fc3795c570be2b94486aa91c499c5b6a2cff936d530d6864b57f0269deb73a7f333339e41e89b4cd0452655ff7c40f58360cea13efaee645115

doc/src/hilite/hilite.ml.html

Source file hilite.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
type error = [ `Unknown_lang of string ]

let langs = [ "ocaml"; "dune"; "opam"; "sh"; "shell"; "diff"; "bash" ]

let filteri p l =
  let rec aux i acc = function
    | [] -> List.rev acc
    | x :: l -> aux (i + 1) (if p i x then x :: acc else acc) l
  in
  aux 0 [] l

(* The following HTML escaping logic is borrowed from Cmarkit_html.
   
   Copyright (c) 2021 The cmarkit programmers. All rights reserved.
   SPDX-License-Identifier: ISC *)

let buffer_add_html_escaped_string b s =
  let string = Buffer.add_string in
  let len = String.length s in
  let max_idx = len - 1 in
  let flush b start i =
    if start < len then Buffer.add_substring b s start (i - start)
  in
  let rec loop start i =
    if i > max_idx then flush b start i
    else
      let next = i + 1 in
      match String.get s i with
      | '\x00' ->
          flush b start i;
          Buffer.add_utf_8_uchar b Uchar.rep;
          loop next next
      | '&' ->
          flush b start i;
          string b "&amp;";
          loop next next
      | '<' ->
          flush b start i;
          string b "&lt;";
          loop next next
      | '>' ->
          flush b start i;
          string b "&gt;";
          loop next next
      (*    | '\'' -> flush c start i; string c "&apos;"; loop next next *)
      | '\"' ->
          flush b start i;
          string b "&quot;";
          loop next next
      | _c -> loop start next
  in
  loop 0 0

let span ?(escape = true) class_gen t =
  let drop_last lst =
    let l = List.length lst in
    filteri (fun i _ -> i < l - 1) lst
  in
  let span_gen c s =
    let s =
      let buf = Buffer.create 128 in
      if escape then buffer_add_html_escaped_string buf s
      else Buffer.add_string buf s;
      Buffer.contents buf
    in
    (class_gen c, s)
  in
  span_gen (String.concat "-" (drop_last t))

let mk_block ?escape lang =
  List.map
    (List.map (fun (scope, str) ->
         (span ?escape (fun c -> lang ^ "-" ^ c) scope) str))

let rec highlight_tokens i spans line = function
  | [] -> List.rev spans
  | tok :: toks ->
      let j = TmLanguage.ending tok in
      assert (j > i);
      let text = String.sub line i (j - i) in
      let scope =
        match TmLanguage.scopes tok with
        | [] -> []
        | scope :: _ -> String.split_on_char '.' scope
      in
      highlight_tokens j ((scope, text) :: spans) line toks

let highlight_string t grammar stack str =
  let lines = String.split_on_char '\n' str in
  let rec loop stack acc = function
    | [] -> List.rev acc
    | line :: lines ->
        (* Some patterns don't work if there isn't a newline *)
        let line = line ^ "\n" in
        let tokens, stack = TmLanguage.tokenize_exn t grammar stack line in
        let spans = highlight_tokens 0 [] line tokens in
        loop stack (spans :: acc) lines
  in
  loop stack [] lines

let add_name name = function
  | `Assoc assoc -> `Assoc (("name", `String name) :: assoc)
  | _ -> failwith "Failed to add name, object not given"

let lang_to_plists s =
  match String.lowercase_ascii s with
  | "ocaml" -> [ Jsons.ocaml_interface; Jsons.ocaml ]
  | "dune" -> [ Jsons.dune ]
  | "opam" -> [ Jsons.opam ]
  | "sh" -> [ Jsons.shell |> add_name "sh" ]
  | "shell" -> [ Jsons.shell |> add_name "shell" ]
  | "bash" -> [ Jsons.shell |> add_name "bash" ]
  | "diff" -> [ Jsons.diff |> add_name "diff" ]
  | _ -> []

type tm_lookup_method = [ `Name | `Scope_name | `Filetype ]

let find_grammar_fun = function
  | `Name -> TmLanguage.find_by_name
  | `Scope_name -> TmLanguage.find_by_scope_name
  | `Filetype -> TmLanguage.find_by_filetype

let src_code_to_pairs ?escape ?(lookup_method = `Name) ?tm ~lang src =
  let t =
    match tm with
    | Some tm -> tm
    | None ->
        let t = TmLanguage.create () in
        let plist = lang_to_plists lang in
        let grammars = List.map TmLanguage.of_yojson_exn plist in
        List.iter (TmLanguage.add_grammar t) grammars;
        t
  in
  match (find_grammar_fun lookup_method) t lang with
  | None -> Error (`Unknown_lang lang)
  | Some grammar ->
      Ok
        (highlight_string t grammar TmLanguage.empty src
        |> mk_block ?escape lang)

let src_code_to_html ?escape ?lookup_method ?tm ~lang src =
  let pair_to_span (class_, content) =
    "<span class='" ^ class_ ^ "'>" ^ content ^ "</span>"
  in
  src_code_to_pairs ?escape ?lookup_method ?tm ~lang src |> function
  | Ok pairs ->
      Ok
        ("<pre><code>"
        ^ (String.concat "" @@ List.map pair_to_span @@ List.concat pairs)
        ^ "</code></pre>")
  | Error _ as e -> e

module Grammars = struct
  include Jsons
end