package spurs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file csvec.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196open Common type 'a t = { dim : int; indices : int Dynarray.t; data : 'a Dynarray.t } [@@deriving show, eq] (** A sparse vector, storing the indices of its non-zero data. It contains a sorted [indices] array and a corresponding [data] array. *) let get_dim v = v.dim let get_indices v = v.indices let get_data v = v.data let copy v = { dim = v.dim; indices = Dynarray.copy v.indices; data = Dynarray.copy v.data } (* Exception type *) exception VectorException of string (** {1 Creation functions for CsVec}*) let new_trusted n indices data = let indices = Dynarray.of_array indices in let data = Dynarray.of_array data in { dim = n; indices; data } (* Helper functions for creation *) let new_checked n indices data = let open Dynarray in if not (Utils.is_sorted indices) then Error "Unsorted indices" else if length indices <> length data then Error "Indices and data have unequal lengths" else if length indices > 0 && get_last indices >= n then Error "Indices larger than vector size" else Ok { dim = n; indices; data } (** Create a sparse vector. Returns an error: - If [indices] and [data] lengths differ - If the indices are out of order - If the data is longer than [n] *) let try_new_csvec n indices data = new_checked n (Dynarray.of_array indices) (Dynarray.of_array data) (** Same as {!try_new_csvec}, but raises an error if invalid *) let new_csvec n indices data = match try_new_csvec n indices data with | Ok v -> v | Error s -> raise (VectorException (Printf.sprintf "Could not create sparse matrix: %s" s)) (** Creates a CsVec, sorting indices and data if necessary. Returns an error if the lengths mismatch. *) let new_from_unsorted n indices data = let indices = Dynarray.of_array indices in let data = Dynarray.of_array data in Utils.sort_like indices data; new_checked n indices data (** Create an empty [CsVec] of size [n] for building purposes *) let empty n = new_trusted n [||] [||] (** {1 Indexing and Iteration Functions} *) let nnz (v : 'a t) = Dynarray.length v.data (** Try to index a vector at a specific index. Returns [Some nnz] if index exists. *) let nnz_index (v : 'a t) index = let ( let* ) = Option.bind in let* i = Utils.binary_search v.indices index in Some (Nnz_index.NNZ i) (** Access element at given [NNZ] index, with constant complexity. *) let get_nnz (v : 'a t) (Nnz_index.NNZ i) = Dynarray.get v.data i (** Set element at given [NNZ] index, with constant complexity. *) let set_nnz (v : 'a t) (Nnz_index.NNZ i) x = Dynarray.set v.data i x (** Access element at given index, with logarithmic complexity. *) let get (v : 'a t) index = let ( let* ) = Option.bind in let* i = nnz_index v index in Some (get_nnz v i) (** Set element at given index, with logarithmic complexity. Returns [None] if the index does not exist already as a non-zero.. *) let set (v : 'a t) index x = let ( let* ) = Option.bind in let* i = nnz_index v index in Some (set_nnz v i x) (** [fold f acc v] calls [f acc index data], through each index-data pair in [v] *) let fold f (acc : 'acc) (v : 'a t) = Dynarray.fold_left (fun a (i, d) -> f a i d) acc (Utils.zip v.indices v.data) (** [iter f v] calls [f index data] on each index-data pair in [v]*) let iter f (v : 'a t) = let open Dynarray in for i = 0 to length v.indices do f v.indices.!(i) v.data.!(i) done (** [map f v] returns a new vector with data mapped by [f] *) let map f (v : 'a t) = let indices = Dynarray.copy v.indices in let data = Dynarray.map f v.data in { dim = v.dim; indices; data } (** [map_inplace f v] maps [f] onto [v.data] in place. *) let map_inplace f (v : 'a t) = Utils.map_inplace f v.data (** Count how many elements make [f] return true *) let count f (v : 'a t) = let c = ref 0 in Dynarray.iter (fun x -> if f x then incr c) v.data; !c (** Returns whether a vector is empty. *) let is_empty (v : 'a t) = Dynarray.is_empty v.data (** {1 Building vectors}*) (** Append an element to the sparse vector. The append should preserve the structure. Raises an exception if: - [ind] is lower or equal to the last element of v.indices - [ind] is greater than v.dim *) let append (v : 'a t) ind x = let open Dynarray in if ind >= v.dim then raise (VectorException "Out-of-bounds append"); if length v.indices > 0 && ind <= get_last v.indices then raise (VectorException "Unsorted append"); add_last v.indices ind; add_last v.data x (** {1 Miscellaneous}*) (** Check the sparse structure, namely that: - [indices] are sorted - all [indices] are less than [dim] *) let check_structure (v : 'a t) = if not (Utils.is_sorted v.indices) then Error "Unsorted indices" else if Dynarray.(length v.indices > 0 && get_last v.indices >= v.dim) then Error "Out of bounds index" else Ok () (** {1 Conversions}*) (** Convert this vector into a dense array. The rest of the vector is filled with zeroes.*) let to_dense (v : float t) = let out = Array.make v.dim 0. in iter (Array.set out) v; out (** Convert this vector into a index -> value hashtbl *) let to_hashtbl (v : 'a t) = let out = Hashtbl.create (nnz v) in iter (Hashtbl.add out) v; out (** {1 Normalization}*) (** Compute the L1-norm of [v] *) let l1_norm (v : float t) = v.data |> Dynarray.map abs_float |> Dynarray.fold_left ( +. ) 0. (** Compute the squared L2-norm of [v] *) let squared_l2_norm (v : float t) = v.data |> Dynarray.map (fun x -> Float.pow x 2.) |> Dynarray.fold_left ( +. ) 0. (** Compute the L2-norm of [v] *) let l2_norm (v : float t) = v |> squared_l2_norm |> Float.sqrt (** Compute the p-order norm of [v]*) let norm (v : float t) p = if Dynarray.is_empty v.data then 0. else if Float.is_infinite p then (* p = inf returns max *) if p > 0. then v.data |> Dynarray.map abs_float |> Dynarray.fold_left max (-.infinity) (* p = -inf returns min *) else v.data |> Dynarray.map abs_float |> Dynarray.fold_left min infinity (* p = 0 returns count of nonzero values *) else if p = 0. then count (fun x -> x <> 0.) v |> float_of_int else (* standard norm *) v.data |> Dynarray.map abs_float |> Dynarray.map (fun x -> Float.pow x p) |> Dynarray.fold_left ( +. ) 0. |> fun sum -> Float.pow sum (1. /. p) (** Divides the vector by its own L2-norm in-place. The zero vector is left unchanged *) let normalize (v : float t) = let n = l2_norm v in if n > 0. then map_inplace (fun x -> x /. n) v