package zarr

  1. Overview
  2. Docs
An Ocaml implementation of the Zarr V3 specification

Install

Dune Dependency

Authors

Maintainers

Sources

v0.1.0.tar.gz
md5=df6bb0048a4479632c2867d5259c9b27
sha512=341b9db6910a90bb3663c36ae75afb84324c52980b7c6866809424f40cdcc4490eb1f606f5d2a3b1cc91e54671bb09cfc9feae3d9bb55474a66762658d26860c

Description

The Zarr library provides an OCaml implementation of the Zarr version 3 storage format specification for chunked & compressed multi-dimensional arrays, designed for use in parallel computing.

Tags

zarr chunked arrays zarr version 3

Published: 07 Sep 2024

README

README.md

zarr-ml

This library provides an OCaml implementation of the Zarr version 3 storage format specification for chunked & compressed multi-dimensional arrays, designed for use in parallel computing.

Features

  • Supports creating n-dimensional Zarr arrays and chunking them along any dimension.

  • Compresses chunks using a variety of supported compression codecs.

  • Supports indexing operations to read/write views of a Zarr array.

  • Supports storing arrays in-memory or the local filesystem. It is also extensible, allowing users to easily create and use their own custom storage backends. See the example implementing a Zip file store for more details.

  • Supports both synchronous and concurrent I/O via Lwt and Eio.

  • Leverages the strong type system of Ocaml to create a type-safe API; making it impossible to create, read or write malformed arrays.

  • Supports organizing arrays into heirarchies via groups.

Documentation

API documentation can be found here. The full specification of the storage format can be found there.

Installation

To install the development version using the latest git commit, do

# for zarr-sync
 opam pin add zarr-sync git+https://github.com/zoj613/zarr-ml 
# for zarr-lwt
 opam pin add zarr-lwt git+https://github.com/zoj613/zarr-ml 
# for zarr-eio
 opam pin add zarr-eio git+https://github.com/zoj613/zarr-ml 

Quick start

Below is a demonstration of the library's API for synchronous reads/writes. A similar example using the Lwt-backed Asynchronous API can be found here

setup

open Zarr
open Zarr.Metadata
open Zarr.Node
open Zarr.Codecs
open Zarr.Indexing
open Zarr_sync.Storage
(* opens infix operators >>= and >>| for monadic bind & map *)
open FilesytemStore.Deferred.Infix

let store = FilesystemStore.create "testdata.zarr";;

create group

let group_node = GroupNode.of_path "/some/group";;
FilesystemStore.create_group store group_node;;

create an array

let array_node = ArrayNode.(group_node / "name");;
(* creates an array with char data type and fill value '?' *)
FilesystemStore.create_array
  ~codecs:[`Transpose [|2; 0; 1|]; `Bytes BE; `Gzip L2]
  ~shape:[|100; 100; 50|]
  ~chunks:[|10; 15; 20|]
  Ndarray.Char 
  '?'
  array_node
  store;;

read/write from/to an array

let slice = [|R [|0; 20|]; I 10; R [||]|];;
let x = FilesystemStore.read_array store array_node slice Ndarray.Char;;
(* Do some computation on the array slice *)
let x' = Zarr.Ndarray.map (fun _ -> Random.int 256 |> Char.chr) x;;
FilesystemStore.write_array store array_node slice x';;
let y = FilesystemStore.read_array store array_node slice Ndarray.Char;;
assert (Ndarray.equal x' y);;

create an array with sharding

let config =
  {chunk_shape = [|5; 3; 5|]
  ;codecs = [`Transpose [|2; 0; 1|]; `Bytes LE; `Gzip L5]
  ;index_codecs = [`Bytes BE; `Crc32c]
  ;index_location = Start};;

let shard_node = ArrayNode.(group_node / "another");;

FilesystemStore.create_array
  ~codecs:[`ShardingIndexed config]
  ~shape:[|100; 100; 50|]
  ~chunks:[|10; 15; 20|]
  Ndarray.Complex32
  Complex.zero
  shard_node
  store;;

exploratory functions

let a, g = FilesystemStore.find_all_nodes store;;
List.map ArrayNode.to_path a;;
(*- : string list = ["/some/group/name"; "/some/group/another"] *)
List.map GroupNode.to_path g;;
(*- : string list = ["/"; "/some"; "/some/group"] *)

FilesystemStore.reshape store array_node [|25; 32; 10|];;

let meta = FilesystemStore.group_metadata store group_node;;
GroupMetadata.show meta;; (* pretty prints the contents of the metadata *)

FilesystemStore.array_exists store shard_node;;
FilesystemStore.group_exists store group_node;;

let a, g = FilesystemStore.find_child_nodes store group_node;;
List.map ArrayNode.to_path a;;
(*- : string list = ["/some/group/name"; "/some/group/another"] *)
List.map GroupNode.to_path g;;
(*- : string list = [] *)

FilesystemStore.erase_group_node store group_node;;
FilesystemStore.erase_all_nodes store;; (* clears the store *)

Dependencies (6)

  1. checkseum >= "0.4.0"
  2. stdint >= "0.7.2"
  3. ezgzip >= "0.2.0"
  4. yojson >= "1.6.0"
  5. ocaml >= "4.14.0"
  6. dune >= "3.15"

Dev Dependencies (4)

  1. bisect_ppx dev & >= "2.5.0" & with-test
  2. ppx_deriving with-test
  3. ounit2 with-test
  4. odoc with-doc

Conflicts

None

OCaml

Innovation. Community. Security.