package saga

  1. Overview
  2. Docs
Text processing and NLP extensions for Nx

Install

dune-project
 Dependency

Authors

Maintainers

Sources

raven-1.0.0.alpha1.tbz
sha256=8e277ed56615d388bc69c4333e43d1acd112b5f2d5d352e2453aef223ff59867
sha512=369eda6df6b84b08f92c8957954d107058fb8d3d8374082e074b56f3a139351b3ae6e3a99f2d4a4a2930dd950fd609593467e502368a13ad6217b571382da28c

doc/saga.tokenizers/Saga_tokenizers/Bpe/Builder/index.html

Module Bpe.BuilderSource

Sourcetype builder
Sourceval create : unit -> builder

Create a new builder with default settings

Sourceval vocab_and_merges : builder -> vocab -> merges -> builder

Set vocabulary and merges

Sourceval cache_capacity : builder -> int -> builder

Set cache capacity (0 to disable)

Sourceval dropout : builder -> float -> builder

Set dropout probability (0.0 to 1.0)

Sourceval unk_token : builder -> string -> builder

Set unknown token

Sourceval continuing_subword_prefix : builder -> string -> builder

Set prefix for continuing subwords

Sourceval end_of_word_suffix : builder -> string -> builder

Set suffix for end-of-word tokens

Sourceval fuse_unk : builder -> bool -> builder

Set whether to fuse consecutive unknown tokens

Sourceval byte_fallback : builder -> bool -> builder

Enable byte-level fallback for unknown characters

Sourceval ignore_merges : builder -> bool -> builder

Ignore merges and output words directly if in vocab

Sourceval build : builder -> t

Build the BPE model