package orf

  1. Overview
  2. Docs

Module Orf.RFCSource

Sourcemodule IntMap = BatMap.Int
Sourcemodule IntSet = BatSet.Int
Sourcetype features = int IntMap.t
Sourcetype class_label = int
Sourcetype sample = features * class_label
Sourcetype metric =
  1. | Gini
  2. | Shannon
  3. | MCC
Sourcetype forest

trained Random Forests model

Sourcetype int_or_float =
  1. | Int of int
  2. | Float of float
Sourceval train : int -> Random.State.t -> metric -> int -> int_or_float -> int -> int_or_float -> int -> sample array -> forest

train ncores rng metric ntrees max_features card_features max_samples min_node_size training_set

Sourceval predict_one : int -> Random.State.t -> forest -> sample -> class_label * float

(pred_label, pred_proba) = predict_one ncores rng trained_forest sample

Sourceval predict_one_margin : int -> Random.State.t -> forest -> sample -> class_label * float * float

(pred_label, pred_proba, pred_margin) = predict_one_margin ncores rng trained_forest sample

Sourceval predict_many : int -> Random.State.t -> forest -> sample array -> (class_label * float) array

like predict_one but for an array of samples

Sourceval predict_many_margin : int -> Random.State.t -> forest -> sample array -> (class_label * float * float) array

like predict_one_margin but for an array of samples

Sourceval predict_OOB : Random.State.t -> forest -> sample array -> (class_label * class_label) array

use a trained forest to predict on the Out Of Bag (OOB) training set of each tree. The training_set must be provided in the same order than when the model was trained. Can be used to get a reliable model performance estimate, even if you don't have a left out test set. truth_preds = predict_OOB rng forest training_set

Sourceval mcc : class_label -> (class_label * class_label) array -> float

Matthews Correlation Coefficient (MCC). mcc target_class_label truth_preds

Sourceval accuracy : (class_label * class_label) array -> float

Percentage of correct prediction accuracy truth_preds

Sourceval roc_auc : class_label -> (class_label * float) array -> class_label array -> float

ROC AUC roc_auc target_class_label preds true_labels

Sourceval drop_OOB : forest -> forest

make trained model forget OOB samples (reduce model size)

Sourcetype filename = string
Sourceval save : filename -> forest -> unit

Save model to file (Marshal) OOB samples are dropped prior to saving the model.

Sourceval restore : filename -> forest

Restore model from file (Marshal)

The following are needed to implement RFR

Sourceval collect_non_constant_features : (int IntMap.t * 'a) array -> (int * IntSet.t) list
Sourceval partition_samples : int -> int -> (int IntMap.t * 'a) array -> (int IntMap.t * 'a) array * (int IntMap.t * 'a) array
Sourceval cost_function : ('a array -> float) -> 'a array -> 'a array -> float
Sourceval choose_min_cost : Random.State.t -> (float * 'b * 'c * ('d * 'e)) list -> float * 'b * 'c * ('d * 'e)
Sourceval array_parmap : int -> ('a -> 'b) -> 'a array -> 'b -> 'b array
Sourceval ratio_to_int : int -> int -> string -> int_or_float -> int