package MlFront_Exec

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file BuildPaths.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
open struct
  let base32_alphabet_lower = "abcdefghijklmnopqrstuvwxyz234567"
end

(** [base32_encode ?no_pad ?lowercase s] encodes the string [s] into an RFC 4648
    base-32 encoding that is suited for case-insensitive filenames while being
    more compact than hex encoding.

    Use the [~no_pad:()] flag to avoid adding the ["="] padding characters,
    which is recommended for cross-platform safe filenames. Padding is
    recommended by RFC 4648 unless explicitly specified (like we are doing
    now!). *)
let base32_encode =
  (* Note: Initially AI assisted, but had bugs with padding. *)
  let alphabet_standard = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" in
  fun ?no_pad ?lowercase input ->
    let alphabet =
      match lowercase with
      | Some () -> base32_alphabet_lower
      | _ -> alphabet_standard
    in
    let len = String.length input in
    let buffer = Buffer.create (((len * 8) + 4) / 5) in
    (* Approximate output size *)
    let rec loop i current_bits current_length =
      if i < len then begin
        let byte = Char.code input.[i] in
        let new_bits = (current_bits lsl 8) lor byte in
        let new_length = current_length + 8 in
        let rec process_full_chunks bits length =
          if length >= 5 then begin
            let five_bits = (bits lsr (length - 5)) land 0x1F in
            Buffer.add_char buffer alphabet.[five_bits];
            process_full_chunks bits (length - 5)
          end
          else (bits, length)
        in
        let remaining_bits, remaining_length =
          process_full_chunks new_bits new_length
        in
        loop (i + 1) remaining_bits remaining_length
      end
      else begin
        if current_length > 0 then begin
          let five_bits = (current_bits lsl (5 - current_length)) land 0x1F in
          Buffer.add_char buffer alphabet.[five_bits]
        end;
        (* Add padding if necessary *)
        if no_pad = None then
          let padding_needed =
            match Buffer.length buffer mod 8 with
            | 0 -> 0
            | buflen -> 8 - buflen
          in
          for _ = 1 to padding_needed do
            Buffer.add_char buffer '='
          done
      end
    in
    loop 0 0 0;
    Buffer.contents buffer

let hex_to_base32_exn =
  let hex_char_to_int c =
    (* Full table *)
    match c with
    | '0' -> 0
    | '1' -> 1
    | '2' -> 2
    | '3' -> 3
    | '4' -> 4
    | '5' -> 5
    | '6' -> 6
    | '7' -> 7
    | '8' -> 8
    | '9' -> 9
    | 'a' -> 10
    | 'b' -> 11
    | 'c' -> 12
    | 'd' -> 13
    | 'e' -> 14
    | 'f' -> 15
    | 'A' -> 10
    | 'B' -> 11
    | 'C' -> 12
    | 'D' -> 13
    | 'E' -> 14
    | 'F' -> 15
    | _ -> invalid_arg "Invalid hexadecimal character"
  in
  let decode_hex_to_bytes hex_string =
    let len = String.length hex_string in
    if len mod 2 <> 0 then
      invalid_arg
        (Printf.sprintf "Supposedly hex string `%s` has odd length" hex_string);
    let result_len = len / 2 in
    let result = Bytes.create result_len in
    for i = 0 to result_len - 1 do
      let h1 = hex_char_to_int hex_string.[i * 2] in
      let h2 = hex_char_to_int hex_string.[(i * 2) + 1] in
      let byte = (h1 lsl 4) lor h2 in
      Bytes.set result i (Char.chr byte)
    done;
    result
  in
  fun ?no_pad ?lowercase hex ->
    let bytes = decode_hex_to_bytes hex in
    base32_encode ?no_pad ?lowercase (String.of_bytes bytes)

let hex_to_base32 ?no_pad ?lowercase hex =
  try Ok (hex_to_base32_exn ?no_pad ?lowercase hex)
  with Invalid_argument msg -> Error msg

(** The length of a path hash created by {!path_hash}.

    A path hash is never acceptable to use for the value store. *)
let path_hash_len = 16

(** [path_hash s] returns the first 16 characters of the base32 encoded SHA256
    hash of [s].

    This is useful for generating short but somewhat unique identifiers for
    temporary directories. Especially for temporary directories on Windows where
    the 260-character path limit applies.

    A path hash is never acceptable to use for the value store. *)
let path_hash s =
  (* ex. dhfyobilb62bbwr3rdpxklbodpno5r32ybjlat7l54y2ncbdz7fq *)
  let base32 =
    base32_encode ~no_pad:() ~lowercase:()
      (Digestif.SHA256.to_raw_string (Digestif.SHA256.digest_string s))
  in
  Stringext.take base32 path_hash_len

(** The lowercase prefix for identifiers in a value store suitable for
    distinguishing object values from other value types on a case-insensitive
    file systems. *)
let prefix_object = "o"

(** The lowercase prefix for identifiers in a value store suitable for
    distinguishing bundle values from other value types on a case-insensitive
    file systems. *)
let prefix_bundle = "b"

(** The lowercase prefix for identifiers in a value store suitable for
    distinguishing asset values from other value types on a case-insensitive
    file systems. *)
let prefix_asset = "a"

(** The lowercase prefix for identifiers in a value store suitable for
    distinguishing constant values from other value types on a case-insensitive
    file systems. *)
let prefix_constant = "c"

(** The lowercase prefix for identifiers in a value store suitable for
    distinguishing values.json file values from other value types on a
    case-insensitive file systems. *)
let prefix_valuesjsonfile = "j"

(** The lowercase prefix for identifiers in a value store suitable for
    distinguishing parsed values values from other value types on a
    case-insensitive file systems. *)
let prefix_values = "v"

(** The lowercase prefix for identifiers in a value store suitable for
    distinguishing source file values (usually for debugging) from other value
    types on a case-insensitive file systems. *)
let prefix_sourcefile = "s"

(** [categorize_value_id id] categorizes the value id [id] by inspecting its
    first character and the remaining base32 encoded string.

    The categorization is case-insensitive.

    The categorization is based on the prefixes defined in this module. If the
    first character does not match any known prefix, [`Unknown] is returned. *)
let categorize_value_id =
  let is prefix s =
    (* no-padding base32 encoding of SHA256 is 52 characters *)
    (String.starts_with ~prefix s
    || String.starts_with ~prefix:(String.uppercase_ascii prefix) s)
    && String.length s = String.length prefix + 52
    (* all 52 characters belong to [base32_alphabet_lower] *)
    && String.for_all
         (String.contains base32_alphabet_lower)
         (String.sub s (String.length prefix) 52)
  in
  fun id ->
    if String.length id < 1 then `Unknown
    else if is prefix_object id then `Object
    else if is prefix_bundle id then `Bundle
    else if is prefix_asset id then `Asset
    else if is prefix_constant id then `Constant
    else if is prefix_valuesjsonfile id then `ValuesFile
    else if is prefix_values id then `Values
    else if is prefix_sourcefile id then `DebugSourceFile
    else `Unknown

let is_value_id s =
  match categorize_value_id s with `Unknown -> false | _ -> true