package SZXX
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file parsing.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221open! Base open Angstrom let ( .*{} ) bs pos = Bigstringaf.unsafe_get bs pos |> Char.to_int let return_true = return true let return_false = return false let return_none = return None let return_unit = return () let return_nil = return [] let fail_pattern = fail "pattern" let fail_backtrack = fail "backtrack" (** More efficient than [Angstrom.skip_many]. See https://github.com/inhabitedtype/angstrom/pull/219 *) let skip_many p = fix (fun m -> p >>= (fun _ -> return_true) <|> return_false >>= function | true -> m | false -> return_unit ) (* Runs [p] until it returns a [Some], then return that *) let skip_find p = fix (fun m -> let found = ref None in p >>= (function | None -> return_true | Some _ as x -> found := x; return_false) <|> return_false >>= function | true -> m | false -> return !found ) (* Same as [skip_find] but backtracks to the position right before it received [Some] *) let skip_find_backtrack p = fix (fun m -> let found = ref None in p >>= (function | None -> return_true | Some _ as x -> found := x; fail_backtrack) <|> return_false >>= function | true -> m | false -> return !found ) let skip_n_times n p = if n <= 0 then return_unit else fix (fun m -> let count = ref 0 in let* _ = p in Int.incr count; if !count < n then m else return_unit ) (* Faster version of Angstrom.count *) let count n p = if n <= 0 then return_nil else ( let rec loop = function | 0 -> return_nil | n -> lift2 List.cons p (loop (n - 1)) in loop n ) let peek_with len f = Unsafe.peek len f <|> (advance len *> fail_backtrack <|> return_unit) *> Unsafe.peek len f let equal_sub buf1 ~pos1 buf2 ~pos2 ~len = let i = ref 0 in while !i < len && buf1.*{pos1 + !i} = buf2.*{pos2 + !i} do Int.incr i done; !i = len let string s = let len = String.length s in let bs = Bigstringaf.of_string s ~off:0 ~len in Unsafe.take len (fun buf ~off ~len -> if equal_sub buf ~pos1:off bs ~pos2:0 ~len then return_unit else fail_pattern ) >>= Fn.id let bigstring bs = Unsafe.take (Bigstringaf.length bs) (fun buf ~off ~len -> if equal_sub buf ~pos1:off bs ~pos2:0 ~len then return_unit else fail_pattern ) >>= Fn.id let drop p = p *> return_unit (** Fastest benchmarked way to do this. Inspired by [Cstruct.ffind_sub] *) let sub_index ~pattern ~patlen bs ~off ~len = if patlen > len then None else ( let max_zidx_sub = patlen - 1 in let max_zidx_s = off + len - patlen in let rec loop i k = if i > max_zidx_s then None else if k > max_zidx_sub then Some (i - off) else if k > 0 then if pattern.*{k} = bs.*{i + k} then loop i (k + 1) else loop (i + 1) 0 else if pattern.*{0} = bs.*{i} then loop i 1 else loop (i + 1) 0 in loop off 0 ) module Storage = struct type t = { add: Bigstringaf.t -> off:int -> len:int -> unit; finalize: unit -> unit; commit: unit Angstrom.t; } let noop_backtrack = { add = (fun _ ~off:_ ~len:_ -> ()); finalize = (fun () -> ()); commit = return_unit } let noop = { add = (fun _ ~off:_ ~len:_ -> ()); finalize = (fun () -> ()); commit } end (* Boyer–Moore–Horspool algorithm *) module BMH = struct type t = | Found | Shift of int | Restart let make_table ~pattern ~patlen = let table = Array.create ~len:256 Restart in for i = 0 to patlen - 2 do table.(pattern.*{i}) <- Shift (patlen - (i + 1)) done; table let run ~pattern ~patlen table bs = let rec loop = function | -1 -> Found | i when bs.*{i} = pattern.*{i} -> (loop [@tailcall]) (i - 1) | _ -> table.(bs.*{patlen - 1}) in loop (patlen - 1) end let bounded_file_reader ~slice_size ~pattern Storage.{ add; finalize; commit } = let patlen = String.length pattern in let pattern = Bigstringaf.of_string pattern ~off:0 ~len:patlen in let rec slow_path table window = match BMH.run ~pattern ~patlen table window with | Found -> finalize (); commit | Shift by -> add ~off:0 ~len:by window; let diff = patlen - by in Bigstringaf.unsafe_blit window ~src_off:by window ~dst_off:0 ~len:diff; let* () = Unsafe.take by (fun buf ~off ~len -> Bigstringaf.unsafe_blit buf ~src_off:off window ~dst_off:diff ~len ) in (slow_path [@tailcall]) table window | Restart -> add ~off:0 ~len:patlen window; let* () = Unsafe.take patlen (fun buf ~off ~len -> Bigstringaf.unsafe_blit buf ~src_off:off window ~dst_off:0 ~len ) in (slow_path [@tailcall]) table window in let fast_path = peek_with (slice_size + patlen) (sub_index ~pattern ~patlen) >>= function | Some _ as x -> return x | None -> Unsafe.take slice_size add *> commit *> return_none in skip_find_backtrack fast_path >>= function | None -> (* This branch is hit when the fast path fails due to the remaining length being less than the slice_size. That's why the slow_path cannot be using a large Unsafe.peek/take: we don't know how much is left. *) take_bigstring patlen >>= slow_path (BMH.make_table ~pattern ~patlen) | Some at -> ((if at > 0 then Unsafe.take at add else return_unit) >>| finalize) *> commit *> bigstring pattern let take_until_pattern ~slice_size ~pattern = let storage buf partial staged = Storage. { add = (fun bs ~off ~len -> if !staged + len > slice_size then ( Buffer.add_subbytes buf partial ~pos:0 ~len:!staged; staged := 0 ); Bigstringaf.unsafe_blit_to_bytes bs ~src_off:off partial ~dst_off:!staged ~len; staged := !staged + len); finalize = (fun () -> if !staged > 0 then Buffer.add_subbytes buf partial ~pos:0 ~len:!staged); commit = return_unit; } in let* () = return_unit in let buf = Buffer.create 32 in let partial = Bytes.create slice_size in let staged = ref 0 in let+ () = bounded_file_reader ~slice_size ~pattern (storage buf partial staged) in Buffer.contents buf