package fehu

  1. Overview
  2. Docs

Module Fehu_algorithms.ReinforceSource

Reinforce algorithm implementation.

REINFORCE (Monte Carlo Policy Gradient) is a classic policy gradient method that collects complete episodes and updates the policy using Monte Carlo return estimates. See Reinforce for detailed documentation.

Monte Carlo policy gradient (REINFORCE) training API.

Sourcetype config = {
  1. learning_rate : float;
  2. gamma : float;
  3. use_baseline : bool;
  4. reward_scale : float;
  5. entropy_coef : float;
  6. max_episode_steps : int;
}
Sourceval default_config : config
Sourcetype params = Kaun.Ptree.t
Sourcetype metrics = {
  1. episode_return : float;
  2. episode_length : int;
  3. episode_won : bool;
  4. stage_desc : string;
  5. avg_entropy : float;
  6. avg_log_prob : float;
  7. adv_mean : float;
  8. adv_std : float;
  9. value_loss : float option;
  10. total_steps : int;
  11. total_episodes : int;
}
Sourcetype state
Sourceval init : ?baseline_network:Kaun.module_ -> env: ((float, Bigarray.float32_elt) Rune.t, (int32, Bigarray.int32_elt) Rune.t, 'render) Fehu.Env.t -> policy_network:Kaun.module_ -> rng:Rune.Rng.key -> config:config -> unit -> params * state
Sourceval step : env: ((float, Bigarray.float32_elt) Rune.t, (int32, Bigarray.int32_elt) Rune.t, 'render) Fehu.Env.t -> params:params -> state:state -> params * state
Sourceval metrics : state -> metrics
Sourceval train : ?baseline_network:Kaun.module_ -> env: ((float, Bigarray.float32_elt) Rune.t, (int32, Bigarray.int32_elt) Rune.t, 'render) Fehu.Env.t -> policy_network:Kaun.module_ -> rng:Rune.Rng.key -> config:config -> total_timesteps:int -> ?callback:(metrics -> [ `Continue | `Stop ]) -> unit -> params * state
Sourceval save : path:string -> params:params -> state:state -> unit
Sourceval load : path:string -> env: ((float, Bigarray.float32_elt) Rune.t, (int32, Bigarray.int32_elt) Rune.t, 'render) Fehu.Env.t -> policy_network:Kaun.module_ -> ?baseline_network:Kaun.module_ -> config:config -> unit -> (params * state, string) result