package saga
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>
On This Page
Text processing and NLP extensions for Nx
Install
dune-project
Dependency
Authors
Maintainers
Sources
raven-1.0.0.alpha1.tbz
sha256=8e277ed56615d388bc69c4333e43d1acd112b5f2d5d352e2453aef223ff59867
sha512=369eda6df6b84b08f92c8957954d107058fb8d3d8374082e074b56f3a139351b3ae6e3a99f2d4a4a2930dd950fd609593467e502368a13ad6217b571382da28c
doc/saga.tokenizers/Saga_tokenizers/Models/index.html
Module Saga_tokenizers.ModelsSource
Tokenization models module.
Tokenization result
Source
type bpe_model = {vocab : (string, int) Hashtbl.t;merges : (string * string) list;cache_capacity : int;dropout : float option;unk_token : string option;continuing_subword_prefix : string option;end_of_word_suffix : string option;fuse_unk : bool;byte_fallback : bool;
}Model configurations
Source
type t = | BPE of bpe_model| WordPiece of wordpiece_model| WordLevel of wordlevel_model| Unigram of unigram_model
Main model type
Constructors
Source
val bpe :
?vocab:(string * int) list ->
?merges:(string * string) list ->
?cache_capacity:int ->
?dropout:float ->
?unk_token:string ->
?continuing_subword_prefix:string ->
?end_of_word_suffix:string ->
?fuse_unk:bool ->
?byte_fallback:bool ->
?ignore_merges:bool ->
unit ->
tCreate a BPE model
Source
val wordpiece :
?vocab:(string * int) list ->
?unk_token:string ->
?continuing_subword_prefix:string ->
?max_input_chars_per_word:int ->
unit ->
tCreate a WordPiece model
Create a WordLevel model
Source
val unigram :
?vocab:(string * float) list ->
?unk_token:string ->
?byte_fallback:bool ->
?max_piece_length:int ->
?n_sub_iterations:int ->
?shrinking_factor:float ->
unit ->
tCreate a Unigram model
Operations
Add tokens to the model's vocabulary. Returns number of tokens added.
Serialization
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>
On This Page