package saga

  1. Overview
  2. Docs

Module Wordpiece.BuilderSource

Sourcetype builder
Sourceval create : unit -> builder

Create a new builder with default settings

Sourceval files : builder -> string -> builder

Set vocabulary file path

Sourceval vocab : builder -> vocab -> builder

Set vocabulary directly

Sourceval unk_token : builder -> string -> builder

Set unknown token (default: "UNK")

Sourceval continuing_subword_prefix : builder -> string -> builder

Set prefix for continuing subwords (default: "##")

Sourceval max_input_chars_per_word : builder -> int -> builder

Set maximum input characters per word (default: 100)

Sourceval build : builder -> t

Build the WordPiece model