package talon

  1. Overview
  2. Docs

Source file talon_json.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
open Talon
open Yojson.Basic

(* Helper to convert a column value to JSON *)
let value_to_json col idx =
  match col with
  | Col.P (dtype, tensor, _) -> (
      match dtype with
      | Nx.Float32 ->
          let arr : float array = Nx.to_array tensor in
          let value = arr.(idx) in
          if classify_float value = FP_nan then `Null else `Float value
      | Nx.Float64 ->
          let arr : float array = Nx.to_array tensor in
          let value = arr.(idx) in
          if classify_float value = FP_nan then `Null else `Float value
      | Nx.Float16 ->
          let arr : float array = Nx.to_array tensor in
          let value = arr.(idx) in
          if classify_float value = FP_nan then `Null else `Float value
      | Nx.BFloat16 ->
          let arr : float array = Nx.to_array tensor in
          let value = arr.(idx) in
          if classify_float value = FP_nan then `Null else `Float value
      | Nx.Int8 ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx)
      | Nx.UInt8 ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx)
      | Nx.Int16 ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx)
      | Nx.UInt16 ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx)
      | Nx.Int32 ->
          let arr : int32 array = Nx.to_array tensor in
          `Int (Int32.to_int arr.(idx))
      | Nx.Int64 ->
          let arr : int64 array = Nx.to_array tensor in
          `String (Int64.to_string arr.(idx))
      | Nx.Int ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx)
      | Nx.NativeInt ->
          let arr : nativeint array = Nx.to_array tensor in
          `String (Nativeint.to_string arr.(idx))
      | Nx.Complex32 ->
          let arr : Complex.t array = Nx.to_array tensor in
          let c = arr.(idx) in
          `String (Printf.sprintf "%g+%gi" c.re c.im)
      | Nx.Complex64 ->
          let arr : Complex.t array = Nx.to_array tensor in
          let c = arr.(idx) in
          `String (Printf.sprintf "%g+%gi" c.re c.im)
      | Nx.Bool ->
          let arr : bool array = Nx.to_array tensor in
          `Bool arr.(idx)
      | Nx.Int4 ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx)
      | Nx.UInt4 ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx)
      | Nx.Float8_e4m3 ->
          let arr : float array = Nx.to_array tensor in
          let value = arr.(idx) in
          if classify_float value = FP_nan then `Null else `Float value
      | Nx.Float8_e5m2 ->
          let arr : float array = Nx.to_array tensor in
          let value = arr.(idx) in
          if classify_float value = FP_nan then `Null else `Float value
      | Nx.Complex16 ->
          let arr : Complex.t array = Nx.to_array tensor in
          let c = arr.(idx) in
          `String (Printf.sprintf "%g+%gi" c.re c.im)
      | Nx.QInt8 ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx)
      | Nx.QUInt8 ->
          let arr : int array = Nx.to_array tensor in
          `Int arr.(idx))
  | Col.S arr -> ( match arr.(idx) with Some s -> `String s | None -> `Null)
  | Col.B arr -> ( match arr.(idx) with Some b -> `Bool b | None -> `Null)

let to_string ?(orient = `Records) df =
  let n_rows = num_rows df in
  let col_names = column_names df in

  match orient with
  | `Records ->
      (* Row-oriented: list of objects *)
      let records =
        List.init n_rows (fun i ->
            let fields =
              List.map
                (fun col_name ->
                  let col = get_column_exn df col_name in
                  (col_name, value_to_json col i))
                col_names
            in
            `Assoc fields)
      in
      to_string (`List records)
  | `Columns ->
      (* Column-oriented: object with column arrays *)
      let columns =
        List.map
          (fun col_name ->
            let col = get_column_exn df col_name in
            let values = List.init n_rows (fun i -> value_to_json col i) in
            (col_name, `List values))
          col_names
      in
      to_string (`Assoc columns)

(* Helper to detect column type from JSON values *)
let detect_json_dtype values =
  let non_null_values = List.filter (fun v -> v <> `Null) values in

  if List.length non_null_values = 0 then
    `String (* Default to string for all-null columns *)
  else
    (* Check types of non-null values *)
    let all_bool =
      List.for_all (function `Bool _ -> true | _ -> false) non_null_values
    in
    let all_int =
      List.for_all (function `Int _ -> true | _ -> false) non_null_values
    in
    let all_float =
      List.for_all
        (function `Float _ | `Int _ -> true | _ -> false)
        non_null_values
    in

    if all_bool then `Bool
    else if all_int then `Int32
    else if all_float then `Float32
    else `String

let from_string ?(orient = `Records) json_str =
  let json = from_string json_str in

  match orient with
  | `Records -> (
      (* Row-oriented: expect list of objects *)
      match json with
      | `List records ->
          if List.length records = 0 then empty
          else
            (* Get column names from first record *)
            let col_names =
              match List.hd records with
              | `Assoc fields -> List.map fst fields
              | _ -> failwith "Invalid JSON: expected object in records array"
            in

            (* Extract values for each column *)
            let columns_data =
              List.map
                (fun col_name ->
                  let values =
                    List.map
                      (function
                        | `Assoc fields -> (
                            try List.assoc col_name fields
                            with Not_found -> `Null)
                        | _ -> `Null)
                      records
                  in
                  (col_name, values))
                col_names
            in

            (* Create columns based on detected types *)
            let columns =
              List.map
                (fun (col_name, values) ->
                  let dtype = detect_json_dtype values in

                  let column =
                    match dtype with
                    | `Float32 ->
                        let arr =
                          List.map
                            (function
                              | `Null -> None
                              | `Float f -> Some f
                              | `Int i -> Some (float_of_int i)
                              | _ -> None)
                            values
                          |> Array.of_list
                        in
                        Col.float32_opt arr
                    | `Int32 ->
                        let arr =
                          List.map
                            (function
                              | `Null -> None
                              | `Int i -> Some (Int32.of_int i)
                              | _ -> None)
                            values
                          |> Array.of_list
                        in
                        Col.int32_opt arr
                    | `Bool ->
                        let arr =
                          List.map
                            (function
                              | `Null -> None | `Bool b -> Some b | _ -> None)
                            values
                          |> Array.of_list
                        in
                        Col.bool_opt arr
                    | _ ->
                        (* String or mixed types *)
                        let arr =
                          List.map
                            (function
                              | `Null -> None
                              | `String s -> Some s
                              | `Int i -> Some (string_of_int i)
                              | `Float f -> Some (string_of_float f)
                              | `Bool b -> Some (string_of_bool b)
                              | _ -> None)
                            values
                          |> Array.of_list
                        in
                        Col.string_opt arr
                  in
                  (col_name, column))
                columns_data
            in

            create columns
      | _ -> failwith "Invalid JSON: expected array for records orientation")
  | `Columns -> (
      (* Column-oriented: expect object with column arrays *)
      match json with
      | `Assoc fields ->
          if List.length fields = 0 then empty
          else
            let columns =
              List.map
                (fun (col_name, values) ->
                  match values with
                  | `List vals ->
                      let dtype = detect_json_dtype vals in

                      let column =
                        match dtype with
                        | `Float32 ->
                            let arr =
                              List.map
                                (function
                                  | `Null -> None
                                  | `Float f -> Some f
                                  | `Int i -> Some (float_of_int i)
                                  | _ -> None)
                                vals
                              |> Array.of_list
                            in
                            Col.float32_opt arr
                        | `Int32 ->
                            let arr =
                              List.map
                                (function
                                  | `Null -> None
                                  | `Int i -> Some (Int32.of_int i)
                                  | _ -> None)
                                vals
                              |> Array.of_list
                            in
                            Col.int32_opt arr
                        | `Bool ->
                            let arr =
                              List.map
                                (function
                                  | `Null -> None
                                  | `Bool b -> Some b
                                  | _ -> None)
                                vals
                              |> Array.of_list
                            in
                            Col.bool_opt arr
                        | _ ->
                            (* String or mixed types *)
                            let arr =
                              List.map
                                (function
                                  | `Null -> None
                                  | `String s -> Some s
                                  | `Int i -> Some (string_of_int i)
                                  | `Float f -> Some (string_of_float f)
                                  | `Bool b -> Some (string_of_bool b)
                                  | _ -> None)
                                vals
                              |> Array.of_list
                            in
                            Col.string_opt arr
                      in
                      (col_name, column)
                  | _ ->
                      failwith
                        (Printf.sprintf
                           "Invalid JSON: column %s is not an array" col_name))
                fields
            in

            create columns
      | _ -> failwith "Invalid JSON: expected object for columns orientation")

let to_file ?orient df file =
  let json_string = to_string ?orient df in
  let oc = open_out file in
  output_string oc json_string;
  close_out oc

let from_file ?orient file =
  let ic = open_in file in
  let contents = really_input_string ic (in_channel_length ic) in
  close_in ic;
  from_string ?orient contents