package hector

  1. Overview
  2. Docs

Source file IntArray.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# 1 "IntArray.cppo.ml"
(******************************************************************************)
(*                                                                            *)
(*                                   Hector                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*                                                                            *)
(*       Copyright 2024--2024 Inria. All rights reserved. This file is        *)
(*       distributed under the terms of the GNU Library General Public        *)
(*       License, with an exception, as described in the file LICENSE.        *)
(*                                                                            *)
(******************************************************************************)

# 1 "Loop.frag.ml"
(******************************************************************************)
(*                                                                            *)
(*                                   Hector                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*                                                                            *)
(*       Copyright 2024--2024 Inria. All rights reserved. This file is        *)
(*       distributed under the terms of the GNU Library General Public        *)
(*       License, with an exception, as described in the file LICENSE.        *)
(*                                                                            *)
(******************************************************************************)

(* An ordinary loop, from [start] (included) to [finish] (excluded).
   The loop index is named [i], and can be used in the loop [body]. *)


# 22 "Loop.frag.ml"
(* An unrolled loop, with the same semantics. The loop is unrolled
   five times. *)

(* Unrolling a loop five times lets us avoid a bizarre slowness that
   we have observed on arm64 processors, including Apple M1 and M2;
   see https://github.com/ocaml/ocaml/issues/13262 *)


# 49 "Loop.frag.ml"
(* An ordinary loop, from [start] (included) to [finish] (excluded).
   The loop index is named [i], and can be used in the loop body. The
   loop body is of the form [let data = read in write]. We assume that
   the iterations are independent (this is a parallel loop), so a read
   in one iteration commutes with the write in a previous iteration. *)


# 61 "Loop.frag.ml"
(* An unrolled loop, with the same semantics. The loop is unrolled five times.
   We schedule the five reads before the five writes, in the hope of reducing
   the latency caused by load instructions and (perhaps) allowing the compiler
   to merge several memory barriers into one. *)


# 91 "Loop.frag.ml"
(* An ordinary loop, from [finish] (excluded) down to [start] (included).
   The loop index is named [i], and can be used in the loop [body]. *)

# 1 "ValidateSegment.frag.ml"
(******************************************************************************)
(*                                                                            *)
(*                                   Hector                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*                                                                            *)
(*       Copyright 2024--2024 Inria. All rights reserved. This file is        *)
(*       distributed under the terms of the GNU Library General Public        *)
(*       License, with an exception, as described in the file LICENSE.        *)
(*                                                                            *)
(******************************************************************************)

(* -------------------------------------------------------------------------- *)

(**[validate_segment n ofs len] checks that the offset [ofs] and the
   length [len] determine a valid interval inside an array or vector of
   length [n]. *)

let[@inline never] invalid_segment n ofs len =
  Printf.ksprintf invalid_arg
    "invalid segment (ofs = %d, len = %d) in a sequence of length %d"
    ofs len n

let[@inline] validate_segment n ofs len =
  if not (0 <= len && 0 <= ofs && ofs + len <= n) then
    invalid_segment n ofs len
  
# 15 "IntArray.cppo.ml"
  (* needed by MonoArray.frag.ml *)

(* Our implementation of integer arrays is compiled by the OCaml compiler with
   the knowledge that elements have type [int]. As a result, we get better
   machine code. In particular, when reading and writing arrays, the special
   case of floating-point arrays (tagged 254) disappears; and when writing
   arrays, the write barrier (_caml_modify) vanishes. *)

(* -------------------------------------------------------------------------- *)

(* Types. *)

type element = int
type dummy = element
type t = element array
type length = int
type index = int
type offset = int

(* -------------------------------------------------------------------------- *)

(* [unsafe_fill_bytes b o n c] fills the buffer [b], at offset [o], with [n]
   copies of the character [c]. It is implemented in runtime/str.c as a call
   to [memset]. *)

external unsafe_fill_bytes :
  Bytes.t ->
  (* offset in bytes: *) int  ->
  (* length in bytes: *) int  ->
  (* value:           *) char ->
  unit = "caml_fill_bytes"

(* [unsafe_initialize_int_array_segment a o n] initializes the array segment
   determined by array [a], offset [o], and length [n], with arbitrary (valid)
   integer values. *)

let unsafe_initialize_int_array_segment (a : int array) (o : int) (n : int) =
  (* Translate offset and length into bytes. *)
  let o = o * (Sys.word_size / 8) in
  let n = n * (Sys.word_size / 8) in
  (* Fill the array with odd bytes, which are valid integers. *)
  unsafe_fill_bytes (Obj.magic a) o n '\001'

(* -------------------------------------------------------------------------- *)

(* Allocation: [empty], [alloc], and [make]. *)

let empty =
  [||]

(* Instead of [Array.make], we use an unorthodox method to allocate a custom
   block, which the garbage collector does not scan, and disguise it as an
   integer array. The arrays slots are uninitialized; they may contain
   arbitrary data. *)

let alloc (n : length) (_dummy : element) : t =
  assert (0 <= n);
  (* Allocate an uninitialized memory block, which the GC does not scan. *)
  let a = Obj.new_block Obj.abstract_tag n in
  (* Cast it to the type [int array]. *)
  let a : int array = Obj.obj a in
  (* Initialize it. *)
  (* We cannot use [Array.fill], as it can (in some circumstances) read and
     interpret the previous content of the array. A simple loop would work,
     but would be a bit slow (not vectorized; with a safe point). [memset]
     is faster. *)
  unsafe_initialize_int_array_segment a 0 n;
  (* Done. *)
  a

let make (n : length) (x : element) : t =
  assert (0 <= n);
  (* Allocate an uninitialized memory block, which the GC does not scan. *)
  let a = Obj.new_block Obj.abstract_tag n in
  (* Cast it to the type [int array]. *)
  let a : int array = Obj.obj a in
  (* Initialize it. *)
  (* As above, we cannot use [Array.fill]. There is no [memset64] in C.
     So, we use a loop. *)
  
# 94 "IntArray.cppo.ml"
   (
  let __finish = ( n) in
  let __index = ref ( 0) in
  let __limit = __finish - 5 in
  while !__index <= __limit do
    let __this = !__index in
    (let i = __this + 0 in  Array.unsafe_set a i x (* safe *));
    (let i = __this + 1 in  Array.unsafe_set a i x (* safe *));
    (let i = __this + 2 in  Array.unsafe_set a i x (* safe *));
    (let i = __this + 3 in  Array.unsafe_set a i x (* safe *));
    (let i = __this + 4 in  Array.unsafe_set a i x (* safe *));
    __index := __this + 5
  done;
  let __finish = __limit + 5 in
  while !__index < __finish do
    (let i = !__index + 0 in  Array.unsafe_set a i x (* safe *));
    __index := !__index + 1
  done
) 
# 94 "IntArray.cppo.ml"
                                                   ;
  (* Done. *)
  a

(* -------------------------------------------------------------------------- *)

(* Then comes our implementation of monomorphic arrays. *)

(* #defining IMMEDIATE tells MonoArray that we are dealing with an immediate
   type, that is, a non-pointer type. *)


# 1 "MonoArray.frag.ml"
(******************************************************************************)
(*                                                                            *)
(*                                   Hector                                   *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*                                                                            *)
(*       Copyright 2024--2024 Inria. All rights reserved. This file is        *)
(*       distributed under the terms of the GNU Library General Public        *)
(*       License, with an exception, as described in the file LICENSE.        *)
(*                                                                            *)
(******************************************************************************)

(* This file implements several functions on arrays, based on [alloc],
   [unsafe_get], and [unsafe_set]. We also need the empty array, [empty]. *)

(* We ensure that [alloc] is the only function that constructs arrays.
   This allows the user to provide exotic (and possibly unorthodox)
   array construction methods. *)

(* -------------------------------------------------------------------------- *)

(* [length], [unsafe_get], [unsafe_set] are taken from [Stdlib.Array]. *)

(* Type annotations ensure that we perform monomorphic
   array accesses only. (These produce better code.) *)

let length =
  Array.length

let[@inline] unsafe_get (a : t) i =
  Array.unsafe_get a i

let[@inline] unsafe_set (a : t) i x =
  Array.unsafe_set a i x

(* -------------------------------------------------------------------------- *)

(* [blit]. *)


# 42 "MonoArray.frag.ml"
(* If the type [element] is immediate (i.e., not a pointer type) then
   [memmove] can be used. *)

(* In the case of integer arrays, [memmove] can be 4 times faster than
   hand-written loop, and 12 times faster than [Array.blit]. *)

external unsafe_blit :
  int array -> int ->
  int array -> int ->
  int ->
  unit
= "hector_array_blit"

let blit (src : t) sofs dst dofs n =
  validate_segment (length src) sofs n;
  validate_segment (length dst) dofs n;
  unsafe_blit src sofs dst dofs n


# 75 "MonoArray.frag.ml"
(* -------------------------------------------------------------------------- *)

(* We implement [init] and [sub] using [alloc], so that [alloc] is our
   single factory function for arrays. *)

let init n f =
  assert (0 <= n);
  if n = 0 then empty else
  let x = f 0 in
  let a = alloc n x in
  unsafe_set a 0 x; (* safe *)
  
# 86 "MonoArray.frag.ml"
   (
  let __finish = ( n) in
  let __index = ref ( 1) in
  let __limit = __finish - 5 in
  while !__index <= __limit do
    let __this = !__index in
    (let i = __this + 0 in  unsafe_set a i (f i) (* safe *));
    (let i = __this + 1 in  unsafe_set a i (f i) (* safe *));
    (let i = __this + 2 in  unsafe_set a i (f i) (* safe *));
    (let i = __this + 3 in  unsafe_set a i (f i) (* safe *));
    (let i = __this + 4 in  unsafe_set a i (f i) (* safe *));
    __index := __this + 5
  done;
  let __finish = __limit + 5 in
  while !__index < __finish do
    (let i = !__index + 0 in  unsafe_set a i (f i) (* safe *));
    __index := !__index + 1
  done
) 
# 86 "MonoArray.frag.ml"
                                                 ;
  a

(* [sub a o n] is equivalent to [init n (fun i -> A.get a (o + i))]. *)

let sub a o n =
  validate_segment (length a) o n;
  if n = 0 then empty else
  let dummy = unsafe_get a o in (* safe *)
  let a' = alloc n dummy in
  blit a o a' 0 n;
  a'

(* -------------------------------------------------------------------------- *)

(* [fill] is taken from [Stdlib.Array]. *)

(* Because there is no word-sized variant of [memset] in C, we cannot
   implement [fill] in a more efficient way, even when IMMEDIATE is
   #defined. *)

let[@inline] fill (a : t) o k x =
  Array.fill a o k x

# 109 "IntArray.cppo.ml"
(* -------------------------------------------------------------------------- *)

(* [grow] allocates a semi-initialized array. *)

(* Of course, [grow] can always be implemented by a combination of [alloc]
   and [blit]. Our implementation (below) can in theory be more efficient,
   as the lower segment is written just once, instead of twice. In practice,
   we observe that it yields only a 1% performance improvement on the [push]
   benchmark. *)

let grow (n : length) (_dummy : element) (s : t) (k : length) : t =
  assert (0 <= k && k <= n);
  (* Allocate an uninitialized memory block, which the GC does not scan. *)
  let a = Obj.new_block Obj.abstract_tag n in
  (* Cast it to the type [int array]. *)
  let a : int array = Obj.obj a in
  (* Initialize the lower segment by copying data from [s]. *)
  unsafe_blit s 0 a 0 k;
  (* Initialize the upper segment with arbitrary integer values. *)
  unsafe_initialize_int_array_segment a k (n - k);
  (* Done. *)
  a
OCaml

Innovation. Community. Security.