Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
talon_csv.ml1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175(*--------------------------------------------------------------------------- Copyright (c) 2026 The Raven authors. All rights reserved. SPDX-License-Identifier: ISC ---------------------------------------------------------------------------*) type dtype_spec = (string * [ `Float32 | `Float64 | `Int32 | `Int64 | `Bool | `String ]) list let default_na_values = [ ""; "NA"; "N/A"; "null"; "NULL"; "nan"; "NaN" ] let is_null_value na_values s = List.mem s na_values let detect_dtype na_values values = let non_null_values = List.filter (fun v -> not (is_null_value na_values v)) values in if List.length non_null_values = 0 then `String else let all_bool = List.for_all (fun v -> match String.lowercase_ascii v with | "true" | "t" | "yes" | "y" | "1" | "false" | "f" | "no" | "n" | "0" -> true | _ -> false) non_null_values in if all_bool then `Bool else let all_int, needs_int64 = List.fold_left (fun (all_ok, overflow) v -> if not all_ok then (false, overflow) else try let i64 = Int64.of_string v in let too_big = i64 > Int64.of_int32 Int32.max_int || i64 < Int64.of_int32 Int32.min_int in (true, overflow || too_big) with _ -> (false, overflow)) (true, false) non_null_values in if all_int then if needs_int64 then `Int64 else `Int32 else let all_float = List.for_all (fun v -> try ignore (float_of_string v); true with _ -> false) non_null_values in if all_float then `Float32 else `String let columns_of_rows na_values dtype_spec column_names data_rows = let num_cols = List.length column_names in let columns_data = Array.init num_cols (fun _ -> []) in List.iter (fun row -> List.iteri (fun i value -> if i < num_cols then columns_data.(i) <- value :: columns_data.(i)) row) data_rows; Array.iteri (fun i lst -> columns_data.(i) <- List.rev lst) columns_data; List.mapi (fun i name -> let values = columns_data.(i) in let dtype = match dtype_spec with | Some specs -> ( try List.assoc name specs with Not_found -> detect_dtype na_values values) | None -> detect_dtype na_values values in let parse_col values ~parse ~make = let arr = List.map (fun v -> if is_null_value na_values v then None else try Some (parse v) with _ -> None) values |> Array.of_list in make arr in let column = match dtype with | `Float32 -> parse_col values ~parse:float_of_string ~make:Talon.Col.float32_opt | `Float64 -> parse_col values ~parse:float_of_string ~make:Talon.Col.float64_opt | `Int32 -> parse_col values ~parse:Int32.of_string ~make:Talon.Col.int32_opt | `Int64 -> parse_col values ~parse:Int64.of_string ~make:Talon.Col.int64_opt | `Bool -> parse_col values ~make:Talon.Col.bool_opt ~parse:(fun v -> match String.lowercase_ascii v with | "true" | "t" | "yes" | "y" | "1" -> true | "false" | "f" | "no" | "n" | "0" -> false | _ -> raise Exit) | `String -> parse_col values ~parse:Fun.id ~make:Talon.Col.string_opt in (name, column)) column_names let col_string_fns na_repr df = List.map (fun name -> Talon.Col.to_string_fn ~null:na_repr (Talon.get_column_exn df name)) (Talon.column_names df) let df_of_rows ?names ?(na_values = default_na_values) ?dtype_spec rows = match names with | Some column_names -> ( match rows with | [] -> let columns = List.map (fun name -> (name, Talon.Col.string [||])) column_names in Talon.create columns | _ -> columns_of_rows na_values dtype_spec column_names rows |> Talon.create ) | None -> ( match rows with | [] -> Talon.empty | [ header ] -> let columns = List.map (fun name -> (name, Talon.Col.string [||])) header in Talon.create columns | header :: data -> columns_of_rows na_values dtype_spec header data |> Talon.create) let of_string ?(sep = ',') ?names ?na_values ?dtype_spec s = df_of_rows ?names ?na_values ?dtype_spec (Csv_io.parse ~separator:sep s) let to_string ?(sep = ',') ?(na_repr = "") df = let buf = Buffer.create 1024 in let fns = col_string_fns na_repr df in let n_rows = Talon.num_rows df in Csv_io.write_row buf sep (Talon.column_names df); for i = 0 to n_rows - 1 do Csv_io.write_row buf sep (List.map (fun f -> f i) fns) done; Buffer.contents buf let read ?(sep = ',') ?names ?na_values ?dtype_spec path = In_channel.with_open_text path @@ fun ic -> let rows = ref [] in (try while true do let line = Csv_io.strip_cr (input_line ic) in if line <> "" then rows := Csv_io.parse_row sep line :: !rows done with End_of_file -> ()); df_of_rows ?names ?na_values ?dtype_spec (List.rev !rows) let write ?(sep = ',') ?(na_repr = "") path df = Out_channel.with_open_text path @@ fun oc -> let buf = Buffer.create 256 in let fns = col_string_fns na_repr df in let n_rows = Talon.num_rows df in Csv_io.write_row buf sep (Talon.column_names df); output_string oc (Buffer.contents buf); for i = 0 to n_rows - 1 do Buffer.clear buf; Csv_io.write_row buf sep (List.map (fun f -> f i) fns); output_string oc (Buffer.contents buf) done