package saga

  1. Overview
  2. Docs
Text processing and NLP extensions for Nx

Install

dune-project
 Dependency

Authors

Maintainers

Sources

raven-1.0.0.alpha2.tbz
sha256=93abc49d075a1754442ccf495645bc4fdc83e4c66391ec8aca8fa15d2b4f44d2
sha512=5eb958c51f30ae46abded4c96f48d1825f79c7ce03f975f9a6237cdfed0d62c0b4a0774296694def391573d849d1f869919c49008acffca95946b818ad325f6f

doc/src/saga.tokenizers/chars.ml.html

Source file chars.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
type t = unit

let create () = ()

let tokenize () text =
  if String.length text = 0 then []
  else
    let chars = ref [] in
    let offset = ref 0 in
    String.iter
      (fun c ->
        let char_str = String.make 1 c in
        let id = Char.code c in
        chars := (id, char_str, (!offset, !offset + 1)) :: !chars;
        incr offset)
      text;
    List.rev !chars

let token_to_id () token =
  if String.length token = 1 then Some (Char.code token.[0]) else None

let id_to_token () id =
  if id >= 0 && id <= 255 then Some (String.make 1 (Char.chr id)) else None

let get_vocab () = []
let get_vocab_size () = 256 (* All ASCII characters *)
let save () ~folder:_ () = []