package saga

  1. Overview
  2. Docs
Text processing and NLP extensions for Nx

Install

dune-project
 Dependency

Authors

Maintainers

Sources

raven-1.0.0.alpha1.tbz
sha256=8e277ed56615d388bc69c4333e43d1acd112b5f2d5d352e2453aef223ff59867
sha512=369eda6df6b84b08f92c8957954d107058fb8d3d8374082e074b56f3a139351b3ae6e3a99f2d4a4a2930dd950fd609593467e502368a13ad6217b571382da28c

doc/saga.tokenizers/Saga_tokenizers/Wordpiece/Builder/index.html

Module Wordpiece.BuilderSource

Sourcetype builder
Sourceval create : unit -> builder

Create a new builder with default settings

Sourceval files : builder -> string -> builder

Set vocabulary file path

Sourceval vocab : builder -> vocab -> builder

Set vocabulary directly

Sourceval unk_token : builder -> string -> builder

Set unknown token (default: "UNK")

Sourceval continuing_subword_prefix : builder -> string -> builder

Set prefix for continuing subwords (default: "##")

Sourceval max_input_chars_per_word : builder -> int -> builder

Set maximum input characters per word (default: 100)

Sourceval build : builder -> t

Build the WordPiece model