package core_profiler

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file reservoir_sampling.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
(** This is a copy of resevoir sampling from [Jane.Order_stats_reservoir_sampling.Int]
    with several non-essential functions removed.

    We can kill this someday when the [Jane] is publicly released. *)
open Core

open Poly

module Make (E : sig
  type t [@@deriving sexp, bin_io, compare]

  val make_array : len:int -> t array
  val set : t array -> int -> t -> unit
end) =
struct
  type element = E.t [@@deriving sexp, bin_io]

  type t =
    { mutable total_samples_seen : int (* total number of samples seen by [add] *)
    ; mutable samples_count : int (* number of samples retained in [samples] *)
    ; mutable samples : E.t array
        (* remembered samples (between the indices 0 and
                                          samples_count-1 inclusive; the other values are
                                          meaningless) *)
    ; mutable samples_are_sorted : bool (* flag to avoid resorting *)
    }
  [@@deriving sexp, bin_io]

  let create ?(num_samples_to_keep = 10_000) () =
    if num_samples_to_keep < 1
    then invalid_arg "num_samples_to_keep must be positive"
    else if num_samples_to_keep > 1_000_000_000
    then invalid_arg "num_samples_to_keep shouldn't be over a billion"
    else
      { total_samples_seen = 0
      ; samples_count = 0
      ; samples = E.make_array ~len:num_samples_to_keep
      ; samples_are_sorted = true
      }
  ;;

  let add t sample =
    t.total_samples_seen <- t.total_samples_seen + 1;
    let index_to_replace = Random.int t.total_samples_seen in
    let len = Array.length t.samples in
    if index_to_replace < len
    then (
      if t.samples_count < len
      then (
        (* t.samples has unoccupied slots *)
        E.set t.samples t.samples_count sample;
        t.samples_count <- t.samples_count + 1)
      else E.set t.samples index_to_replace sample;
      t.samples_are_sorted <- false)
  ;;

  let sort t =
    if not t.samples_are_sorted
    then (
      Array.sort t.samples ~compare:E.compare ~pos:0 ~len:t.samples_count;
      t.samples_are_sorted <- true)
  ;;

  let percentile t p =
    if p < 0. || p > 1.
    then Or_error.error_string "Order_stats.percentile: must be between 0 and 1"
    else if t.samples_count = 0
    then Or_error.error_string "Order_stats.percentile: no samples yet"
    else (
      sort t;
      let index = Float.iround_towards_zero_exn (p *. Float.of_int t.samples_count) in
      let index =
        if index >= t.samples_count (* in case p=1 or rounding error *)
        then t.samples_count - 1
        else index
      in
      Result.Ok t.samples.(index))
  ;;

  let percentile_exn t p = Or_error.ok_exn (percentile t p)

  let distribution t =
    sort t;
    (* looping in reverse to construct the list in sorted order *)
    let rec loop i ls = if i = -1 then ls else loop (i - 1) (t.samples.(i) :: ls) in
    loop (t.samples_count - 1) []
  ;;
end

include Make (struct
  include Int

  let make_array ~len = Array.create ~len 0
  let set (t : int array) i v = t.(i) <- v
end)