package owl

  1. Overview
  2. Docs
Legend:
Library
Module
Module type
Parameter
Class
Class type

NLP: TFIDF module

module Vec = Owl_dense_vector_d
type tf_typ =
  1. | Binary
  2. | Count
  3. | Frequency
  4. | Log_norm
type df_typ =
  1. | Unary
  2. | Idf
  3. | Idf_Smooth
type t = {
  1. mutable uri : string;
  2. mutable tf_typ : tf_typ;
  3. mutable df_typ : df_typ;
  4. mutable offset : int array;
  5. mutable doc_freq : float array;
  6. mutable corpus : Owl_nlp_corpus.t;
  7. mutable handle : Stdlib.in_channel option;
}
val term_freq : tf_typ -> float -> float -> float
val doc_freq : df_typ -> float -> float -> float
val tf_typ_string : tf_typ -> string
val df_typ_string : df_typ -> string
val create : tf_typ -> df_typ -> Owl_nlp_corpus.t -> t
val get_uri : t -> string
val get_corpus : t -> Owl_nlp_corpus.t
val length : t -> int
val vocab_len : t -> int
val get_handle : t -> Stdlib.in_channel
val doc_count_of : t -> string -> float
val doc_count : Owl_nlp_vocabulary.t -> string -> float array * int
val term_count : ('a, float) Stdlib.Hashtbl.t -> 'a array -> unit
val normalise : ('a * float) array -> ('a * float) array
val _build_with : bool -> bool -> (float -> float -> float) -> (float -> float -> float) -> t -> unit
val build : ?norm:bool -> ?sort:bool -> ?tf:tf_typ -> ?df:df_typ -> Owl_nlp_corpus.t -> t
val next : t -> (int * float) array
val next_batch : ?size:int -> t -> (int * float) array array
val iteri : (int -> 'a -> 'b) -> t -> unit
val mapi : (int -> 'a -> 'b) -> t -> 'b array
val get : t -> int -> (int * float) array
val reset_iterators : t -> unit
val apply : t -> string -> (int * float) array
val save : t -> string -> unit
val load : string -> t
val to_string : t -> string
val print : t -> unit
val density : t -> float
val doc_to_vec : t -> (int * Vec.elt) array -> Vec.vec
val all_pairwise_distance : Owl_nlp_similarity.t -> t -> ('a * float) array -> (int * float) array
val nearest : ?typ:Owl_nlp_similarity.t -> t -> ('a * float) array -> int -> (int * float) array
OCaml

Innovation. Community. Security.