package saga

  1. Overview
  2. Docs

Module Wordpiece.TrainerSource

Sourcetype trainer
Sourcetype trainer_config = {
  1. min_frequency : int;
  2. vocab_size : int;
  3. show_progress : bool;
  4. special_tokens : string list;
  5. limit_alphabet : int option;
  6. initial_alphabet : char list;
  7. continuing_subword_prefix : string;
  8. end_of_word_suffix : string option;
}
Sourceval default_config : trainer_config

Default trainer configuration

Create a new trainer

Sourceval feed : trainer -> string list -> unit

Feed training data to the trainer

Sourceval train : trainer -> t -> string list

Train the model and return special tokens