orf 1.0.0 · OCaml Package

module IntMap = BatMap.Int

module IntSet = BatSet.Int

type features = int IntMap.t

type class_label = int

type sample = features * class_label

type metric =

| Gini
| Shannon
| MCC

type forest

trained Random Forests model

type int_or_float =

| Int of int
| Float of float

val train : 
  int ->
  Random.State.t ->
  metric ->
  int ->
  int_or_float ->
  int ->
  int_or_float ->
  int ->
  sample array ->
  forest

train ncores rng metric ntrees max_features card_features max_samples min_node_size training_set

val predict_one : 
  int ->
  Random.State.t ->
  forest ->
  sample ->
  class_label * float

(pred_label, pred_proba) = predict_one ncores rng trained_forest sample

val predict_one_margin : 
  int ->
  Random.State.t ->
  forest ->
  sample ->
  class_label * float * float

(pred_label, pred_proba, pred_margin) = predict_one_margin ncores rng trained_forest sample

val predict_many : 
  int ->
  Random.State.t ->
  forest ->
  sample array ->
  (class_label * float) array

like predict_one but for an array of samples

val predict_many_margin : 
  int ->
  Random.State.t ->
  forest ->
  sample array ->
  (class_label * float * float) array

like predict_one_margin but for an array of samples

val predict_OOB : 
  Random.State.t ->
  forest ->
  sample array ->
  (class_label * class_label) array

use a trained forest to predict on the Out Of Bag (OOB) training set of each tree. The training_set must be provided in the same order than when the model was trained. Can be used to get a reliable model performance estimate, even if you don't have a left out test set. truth_preds = predict_OOB rng forest training_set

val mcc : class_label -> (class_label * class_label) array -> float

Matthews Correlation Coefficient (MCC). mcc target_class_label truth_preds

val accuracy : (class_label * class_label) array -> float

Percentage of correct prediction accuracy truth_preds

val roc_auc : 
  class_label ->
  (class_label * float) array ->
  class_label array ->
  float

ROC AUC roc_auc target_class_label preds true_labels

val drop_OOB : forest -> forest

make trained model forget OOB samples (reduce model size)

type filename = string

val save : filename -> forest -> unit

Save model to file (Marshal) OOB samples are dropped prior to saving the model.

val restore : filename -> forest

Restore model from file (Marshal)

The following are needed to implement RFR

val collect_non_constant_features : 
  (int IntMap.t * 'a) array ->
  (int * IntSet.t) list

val partition_samples : 
  int ->
  int ->
  (int IntMap.t * 'a) array ->
  (int IntMap.t * 'a) array * (int IntMap.t * 'a) array

val cost_function : ('a array -> float) -> 'a array -> 'a array -> float

val choose_min_cost : 
  Random.State.t ->
  (float * 'b * 'c * ('d * 'e)) list ->
  float * 'b * 'c * ('d * 'e)

val array_parmap : int -> ('a -> 'b) -> 'a array -> 'b -> 'b array

val ratio_to_int : int -> int -> string -> int_or_float -> int

package orf