package bencode

  1. Overview
  2. Docs

Source file bencode_token.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
(** {1 Streaming Tokenization for Bencode} *)

type t =
  [
  | `I of int64
  | `S of string
  | `BeginDict
  | `BeginList
  | `End
  ]

type token = t

type 'a sequence = ('a -> unit) -> unit

let to_string = function
  | `I i -> Int64.to_string i
  | `S s -> s
  | `BeginDict -> "d"
  | `BeginList -> "l"
  | `End -> "e"

module Encode = struct
  type t = {
    str : string -> unit;
    chr : char -> unit;
  }

  let to_buf b = {
    str = Buffer.add_string b;
    chr = (fun c -> Buffer.add_char b c);
  }

  let to_chan oc = {
    str = output_string oc;
    chr = output_char oc;
  }

  let put enc (tok:token) = match tok with
    | `I i ->
        enc.chr 'i';
        enc.str (Int64.to_string i);
        enc.chr 'e'
    | `S s ->
        enc.str (string_of_int (String.length s));
        enc.chr ':';
        enc.str s
    | `BeginDict -> enc.chr 'd'
    | `BeginList -> enc.chr 'l'
    | `End -> enc.chr 'e'

  let put_many enc seq =
    seq (put enc)
end

module Decode = struct
  type result =
    | Next of token
    | End
    | Error of string
    | Await (** more input, for non blocking-IO *)

  (* reading an integer: which sign does it have? *)
  type int_read_state =
    | PosInt    (* positive int *)
    | NegInt    (* negative int *)
    | AnyInt    (* don't know yet which kind of integer *)
    | ZeroInt   (* only 0 *)

  type state =
    | Start
    | ReadInt of int_read_state
    | ReadStringLen
    | ReadString
    | StateError of string

  type refill_result =
    | Refill_eof
    | Refill_read of int
    | Refill_error of string
    | Refill_await

  type t = {
    mutable cur_i : int64;  (* when reading int, or string length *)
    mutable cur_s : Bytes.t; (* when reading string *)
    mutable buf : Bytes.t; (* input buffer *)
    mutable i : int;
    mutable len : int;
    mutable state : state;
    mutable refill : unit -> refill_result;
  }

  let _refill_stop () = Refill_eof

  let _default = {
    cur_i = 0_L;
    cur_s = Bytes.empty;
    buf = Bytes.empty;
    i = 0;
    len = 0;
    state = Start;
    refill = _refill_stop;
  }

  let of_string s = {
    _default with
    buf = Bytes.of_string s;
    len = String.length s;
  }

  let of_bytes s = {
    _default with
    buf = s;
    len = Bytes.length s;
  }

  let of_slice s i len = {
    _default with
    buf = Bytes.of_string s; i; len;
  }

  let of_bytes_slice s i len = {
    _default with
    buf = s; i; len;
  }

  (* move the active slice of buffer to the beginning.
    postcondition: dec.i = 0 *)
  let _move_beginning dec =
    Bytes.blit dec.buf dec.i dec.buf 0 dec.len;
    dec.i <- 0

  let of_chan ic =
    let len = 256 in
    let buf = Bytes.make len ' ' in
    let dec = { _default with buf; len; } in
    let refill () =
      assert (dec.len >= 0);
      (* shift the partial content to the beginning, if any *)
      if dec.len > 0 then _move_beginning dec;
      dec.i <- 0;
      try
        let size = Bytes.length dec.buf - dec.len in
        let n = input ic dec.buf dec.len size in
        Refill_read n
      with
      | End_of_file ->
        dec.refill <- _refill_stop;
        if dec.len = 0 then Refill_eof else Refill_read 0
      | e ->
          let err = Printexc.to_string e in
          dec.state <- StateError err;
          Refill_error err
    in
    dec.refill <- refill;
    dec

  let manual () =
    { _default with refill = (fun () -> Refill_await); }

  let _feed _blit dec s j len' =
    match dec.state with
    | StateError _ -> ()
    | Start
    | ReadInt _
    | ReadStringLen
    | ReadString ->
      _move_beginning dec;
      (* resize if needed *)
      if len' + dec.len > Bytes.length dec.buf then (
        let buf' = Bytes.make (2*(len' + dec.len)) ' ' in
        Bytes.blit dec.buf 0 buf' 0 dec.len;
        dec.buf <- buf';
      );
      _blit s j dec.buf dec.len len';
      dec.len <- dec.len + len';
      ()

  let feed = _feed String.blit
  let feed_bytes = _feed Bytes.blit

  (* how to fail: set state to an error *)
  let _fail : t -> ('a, Buffer.t, unit, result) format4 -> 'a = fun dec fmt ->
    let buf = Buffer.create 16 in
    Printf.kbprintf
      (fun buf ->
        let msg = Buffer.contents buf in
        dec.state <- StateError msg;
        Error msg)
      buf fmt

  let _is_digit_nonzero c =
    Char.code '1' <= Char.code c && Char.code c <= Char.code '9'
  let _is_digit c =
    Char.code '0' <= Char.code c && Char.code c <= Char.code '9'

  let _yield_int dec i =
    dec.state <- Start;
    dec.cur_i <- 0_L;
    Next (`I i)

  let rec next dec =
    match dec.state with
    | StateError e -> Error e
    | _ when dec.len = 0 -> _refill dec
    | ReadString ->
      (* bulk transfer. [n] is how many bytes we can transfer right now *)
      let n = min (Bytes.length dec.cur_s - (Int64.to_int dec.cur_i)) dec.len in
      Bytes.blit dec.buf dec.i dec.cur_s (Int64.to_int dec.cur_i) n;
      dec.i <- dec.i + n;
      dec.cur_i <- Int64.add dec.cur_i (Int64.of_int n);
      dec.len <- dec.len - n;
      if Bytes.length dec.cur_s = (Int64.to_int dec.cur_i)
        then begin
          let s = Bytes.unsafe_to_string dec.cur_s in
          dec.cur_s <- Bytes.empty;
          dec.state <- Start;
          Next (`S s)  (* done! *)
        end
        else next dec
    | _ ->
      (* consume one char *)
      let c = Bytes.get dec.buf dec.i in
      dec.len <- dec.len - 1;
      dec.i <- dec.i + 1;
      begin match dec.state, c with
      | StateError _, _ -> assert false
      | Start, 'd' -> Next `BeginDict
      | Start, 'l' -> Next `BeginList
      | Start, 'e' -> Next `End
      | Start, 'i' ->
        dec.state <- ReadInt AnyInt;
        dec.cur_i <- 0_L;
        next dec
      | Start, '0' ->
        dec.state <- ReadStringLen;
        dec.cur_i <- 0_L;
        next dec
      | Start, c when _is_digit_nonzero c ->
        dec.state <- ReadStringLen;
        dec.cur_i <- Int64.of_int (Char.code c - Char.code '0');
        next dec
      | Start, c ->
        _fail dec "unexpected char for B-encode expr: %c" c
      | ReadInt AnyInt, '-' ->
        dec.state <- ReadInt NegInt;
        next dec
      | ReadInt AnyInt, '0' ->
        (* allow exactly one leading 0 *)
        dec.state <- ReadInt ZeroInt;
        next dec
      | (ReadInt _ | ReadStringLen), '0' when dec.cur_i = 0_L ->
        _fail dec "forbidden leading 0 while reading integer"
      | ReadInt AnyInt, c when _is_digit c ->
        (* case where we start reading a positive int *)
        assert (dec.cur_i = 0_L);
        dec.cur_i <- Int64.of_int (Char.code c - Char.code '0');
        dec.state <- ReadInt PosInt;
        next dec
      | (ReadInt (PosInt | NegInt) | ReadStringLen), c when _is_digit c ->
        (* add a digit to the integer *)
        dec.cur_i <- (Int64.mul dec.cur_i 10_L)
                     |> Int64.add
                       (Int64.of_int @@ Char.code c - Char.code '0');
        next dec
      | ReadInt (PosInt | ZeroInt), 'e' ->
        (* finish reading an int *)
        _yield_int dec dec.cur_i
      | ReadInt NegInt, 'e' ->
        _yield_int dec (Int64.neg dec.cur_i)
      | ReadInt AnyInt, 'e' ->
        _yield_int dec 0_L
      | ReadStringLen, ':' when dec.cur_i = 0_L ->
        dec.state <- Start;
        Next (`S "")
      | ReadStringLen, ':' ->
        (* allocate buffer of the correct size *)
        dec.state <- ReadString;
        dec.cur_s <- Bytes.make (Int64.to_int dec.cur_i) ' ';
        dec.cur_i <- 0_L;
        next dec
      | ReadInt _, c ->
        _fail dec "expected digit or 'e', got %c" c
      | ReadStringLen , c ->
        _fail dec "expected digit or ':', got %c" c
      | ReadString, _ -> assert false
      end

  and _refill dec =
    match dec.refill () with
    | Refill_error e -> _fail dec "error during refill: %s" e
    | Refill_read n when n = 0 -> _refill dec  (* XXX: caution... *)
    | Refill_read _n -> next dec (* available input, next *)
    | Refill_await -> Await
    | Refill_eof ->
        begin match dec.state with
        | Start -> End  (* ok, no leftover *)
        | ReadStringLen
        | ReadString -> _fail dec "unexpected EOF (was reading a string)"
        | ReadInt _ -> _fail dec "unexpected EOF (was reading an int)"
        | StateError e -> Error e
        end

  let rec iter dec k =
    match next dec with
    | Next tok -> k tok; iter dec k
    | _ -> ()

  let to_list dec =
    let rec iter acc = match next dec with
      | Next tok -> iter (tok::acc)
      | Await
      | End -> Some (List.rev acc)
      | Error _ -> None
    in
    iter []
end

module Easy = struct
  let to_string l =
    let buf = Buffer.create 24 in
    let enc = Encode.to_buf buf in
    Encode.put_many enc (fun k -> List.iter k l);
    Buffer.contents buf

  let output oc l =
    let enc = Encode.to_chan oc in
    Encode.put_many enc (fun k -> List.iter k l)

  let of_string s =
    let dec = Decode.of_string s in
    Decode.to_list dec

  let of_bytes s =
    let dec = Decode.of_bytes s in
    Decode.to_list dec

  let of_string_exn s =
    let dec = Decode.of_string s in
    match Decode.to_list dec with
    | Some l -> l
    | None ->
        begin match dec.Decode.state with
        | Decode.StateError e -> failwith e
        | _ -> failwith "invalid decoder state"
        end
end
OCaml

Innovation. Community. Security.