package saga

  1. Overview
  2. Docs

Module Bpe.BuilderSource

Sourcetype builder
Sourceval create : unit -> builder

Create a new builder with default settings

Sourceval vocab_and_merges : builder -> vocab -> merges -> builder

Set vocabulary and merges

Sourceval cache_capacity : builder -> int -> builder

Set cache capacity (0 to disable)

Sourceval dropout : builder -> float -> builder

Set dropout probability (0.0 to 1.0)

Sourceval unk_token : builder -> string -> builder

Set unknown token

Sourceval continuing_subword_prefix : builder -> string -> builder

Set prefix for continuing subwords

Sourceval end_of_word_suffix : builder -> string -> builder

Set suffix for end-of-word tokens

Sourceval fuse_unk : builder -> bool -> builder

Set whether to fuse consecutive unknown tokens

Sourceval byte_fallback : builder -> bool -> builder

Enable byte-level fallback for unknown characters

Sourceval ignore_merges : builder -> bool -> builder

Ignore merges and output words directly if in vocab

Sourceval build : builder -> t

Build the BPE model