Library
Module
Module type
Parameter
Class
Class type
module Trie : Bap.Std.Trie.V2.S with type key = Corpus.key
include V1.S with type key = Corpus.key with type corpus = Corpus.t
include Bin_prot.Binable.S with type t := t
val bin_size_t : t Bin_prot.Size.sizer
val bin_write_t : t Bin_prot.Write.writer
val bin_read_t : t Bin_prot.Read.reader
val __bin_read_t__ : (int -> t) Bin_prot.Read.reader
val bin_writer_t : t Bin_prot.Type_class.writer
val bin_reader_t : t Bin_prot.Type_class.reader
val bin_t : t Bin_prot.Type_class.t
type key = Corpus.key
type corpus = Corpus.t
val create : unit -> t
create ()
creates an empty instance of the byteweigth decider.
train decider ~max_length test corpus
train the decider
on the specified corpus
. The test
function classifies extracted substrings. The max_length
parameter binds the maximum length of substrings.
val length : t -> int
length decider
total amount of different substrings known to a decider.
next t ~length ~threshold data begin
the next positive chunk.
Returns an offset that is greater than begin
of the next longest substring up to the given length
, for which h1 / (h0 + h1) > threshold
.
This is a specialization of the next_if
function from the extended V1.V2.S
interface.
val pp : Stdlib.Format.formatter -> t -> unit
pp ppf decider
prints all known to decider chunks.
type token = Trie.token
next_if t ~length ~f data begin
the next chunk that f
.
Finds the next offset greater than begin
of a string of the given length
for which there was an observing of a substring s
with length n
and statistics stats
, such that f s n stats
is true
.