package spurs

  1. Overview
  2. Docs

Source file csvec.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
open Common

type 'a t = { dim : int; indices : int Dynarray.t; data : 'a Dynarray.t }
[@@deriving show, eq]
(** A sparse vector, storing the indices of its non-zero data.

    It contains a sorted [indices] array and a corresponding [data] array. *)

let get_dim v = v.dim
let get_indices v = v.indices
let get_data v = v.data

let copy v =
  { dim = v.dim; indices = Dynarray.copy v.indices; data = Dynarray.copy v.data }

(* Exception type *)
exception VectorException of string

(** {1 Creation functions for CsVec}*)

let new_trusted n indices data =
  let indices = Dynarray.of_array indices in
  let data = Dynarray.of_array data in
  { dim = n; indices; data }

(* Helper functions for creation *)
let new_checked n indices data =
  let open Dynarray in
  if not (Utils.is_sorted indices) then Error "Unsorted indices"
  else if length indices <> length data then Error "Indices and data have unequal lengths"
  else if length indices > 0 && get_last indices >= n then
    Error "Indices larger than vector size"
  else Ok { dim = n; indices; data }

(** Create a sparse vector.

    Returns an error:
    - If [indices] and [data] lengths differ
    - If the indices are out of order
    - If the data is longer than [n] *)
let try_new_csvec n indices data =
  new_checked n (Dynarray.of_array indices) (Dynarray.of_array data)

(** Same as {!try_new_csvec}, but raises an error if invalid *)
let new_csvec n indices data =
  match try_new_csvec n indices data with
  | Ok v -> v
  | Error s ->
      raise (VectorException (Printf.sprintf "Could not create sparse matrix: %s" s))

(** Creates a CsVec, sorting indices and data if necessary.

    Returns an error if the lengths mismatch. *)
let new_from_unsorted n indices data =
  let indices = Dynarray.of_array indices in
  let data = Dynarray.of_array data in
  Utils.sort_like indices data;
  new_checked n indices data

(** Create an empty [CsVec] of size [n] for building purposes *)
let empty n = new_trusted n [||] [||]

(** {1 Indexing and Iteration Functions} *)

let nnz (v : 'a t) = Dynarray.length v.data

(** Try to index a vector at a specific index. Returns [Some nnz] if index exists. *)
let nnz_index (v : 'a t) index =
  let ( let* ) = Option.bind in
  let* i = Utils.binary_search v.indices index in
  Some (Nnz_index.NNZ i)

(** Access element at given [NNZ] index, with constant complexity. *)
let get_nnz (v : 'a t) (Nnz_index.NNZ i) = Dynarray.get v.data i

(** Set element at given [NNZ] index, with constant complexity. *)
let set_nnz (v : 'a t) (Nnz_index.NNZ i) x = Dynarray.set v.data i x

(** Access element at given index, with logarithmic complexity. *)
let get (v : 'a t) index =
  let ( let* ) = Option.bind in
  let* i = nnz_index v index in
  Some (get_nnz v i)

(** Set element at given index, with logarithmic complexity. Returns [None] if the index
    does not exist already as a non-zero.. *)
let set (v : 'a t) index x =
  let ( let* ) = Option.bind in
  let* i = nnz_index v index in
  Some (set_nnz v i x)

(** [fold f acc v] calls [f acc index data], through each index-data pair in [v] *)
let fold f (acc : 'acc) (v : 'a t) =
  Dynarray.fold_left (fun a (i, d) -> f a i d) acc (Utils.zip v.indices v.data)

(** [iter f v] calls [f index data] on each index-data pair in [v]*)
let iter f (v : 'a t) =
  let open Dynarray in
  for i = 0 to length v.indices do
    f v.indices.!(i) v.data.!(i)
  done

(** [map f v] returns a new vector with data mapped by [f] *)
let map f (v : 'a t) =
  let indices = Dynarray.copy v.indices in
  let data = Dynarray.map f v.data in
  { dim = v.dim; indices; data }

(** [map_inplace f v] maps [f] onto [v.data] in place. *)
let map_inplace f (v : 'a t) = Utils.map_inplace f v.data

(** Count how many elements make [f] return true *)
let count f (v : 'a t) =
  let c = ref 0 in
  Dynarray.iter (fun x -> if f x then incr c) v.data;
  !c

(** Returns whether a vector is empty. *)
let is_empty (v : 'a t) = Dynarray.is_empty v.data

(** {1 Building vectors}*)

(** Append an element to the sparse vector. The append should preserve the structure.

    Raises an exception if:
    - [ind] is lower or equal to the last element of v.indices
    - [ind] is greater than v.dim *)
let append (v : 'a t) ind x =
  let open Dynarray in
  if ind >= v.dim then raise (VectorException "Out-of-bounds append");
  if length v.indices > 0 && ind <= get_last v.indices then
    raise (VectorException "Unsorted append");
  add_last v.indices ind;
  add_last v.data x

(** {1 Miscellaneous}*)

(** Check the sparse structure, namely that:
    - [indices] are sorted
    - all [indices] are less than [dim] *)
let check_structure (v : 'a t) =
  if not (Utils.is_sorted v.indices) then Error "Unsorted indices"
  else if Dynarray.(length v.indices > 0 && get_last v.indices >= v.dim) then
    Error "Out of bounds index"
  else Ok ()

(** {1 Conversions}*)

(** Convert this vector into a dense array.

    The rest of the vector is filled with zeroes.*)
let to_dense (v : float t) =
  let out = Array.make v.dim 0. in
  iter (Array.set out) v;
  out

(** Convert this vector into a index -> value hashtbl *)
let to_hashtbl (v : 'a t) =
  let out = Hashtbl.create (nnz v) in
  iter (Hashtbl.add out) v;
  out

(** {1 Normalization}*)

(** Compute the L1-norm of [v] *)
let l1_norm (v : float t) =
  v.data |> Dynarray.map abs_float |> Dynarray.fold_left ( +. ) 0.

(** Compute the squared L2-norm of [v] *)
let squared_l2_norm (v : float t) =
  v.data |> Dynarray.map (fun x -> Float.pow x 2.) |> Dynarray.fold_left ( +. ) 0.

(** Compute the L2-norm of [v] *)
let l2_norm (v : float t) = v |> squared_l2_norm |> Float.sqrt

(** Compute the p-order norm of [v]*)
let norm (v : float t) p =
  if Dynarray.is_empty v.data then 0.
  else if Float.is_infinite p then
    (* p = inf returns max *)
    if p > 0. then v.data |> Dynarray.map abs_float |> Dynarray.fold_left max (-.infinity)
    (* p = -inf returns min *)
      else v.data |> Dynarray.map abs_float |> Dynarray.fold_left min infinity
      (* p = 0 returns count of nonzero values *)
  else if p = 0. then count (fun x -> x <> 0.) v |> float_of_int
  else
    (* standard norm *)
    v.data |> Dynarray.map abs_float
    |> Dynarray.map (fun x -> Float.pow x p)
    |> Dynarray.fold_left ( +. ) 0.
    |> fun sum -> Float.pow sum (1. /. p)

(** Divides the vector by its own L2-norm in-place. The zero vector is left unchanged *)
let normalize (v : float t) =
  let n = l2_norm v in
  if n > 0. then map_inplace (fun x -> x /. n) v