package fehu
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>
Reinforcement learning framework for OCaml
Install
dune-project
Dependency
Authors
Maintainers
Sources
raven-1.0.0.alpha2.tbz
sha256=93abc49d075a1754442ccf495645bc4fdc83e4c66391ec8aca8fa15d2b4f44d2
sha512=5eb958c51f30ae46abded4c96f48d1825f79c7ce03f975f9a6237cdfed0d62c0b4a0774296694def391573d849d1f869919c49008acffca95946b818ad325f6f
doc/fehu.algorithms/Fehu_algorithms/Reinforce/index.html
Module Fehu_algorithms.ReinforceSource
Reinforce algorithm implementation.
REINFORCE (Monte Carlo Policy Gradient) is a classic policy gradient method that collects complete episodes and updates the policy using Monte Carlo return estimates. See Reinforce for detailed documentation.
Monte Carlo policy gradient (REINFORCE) training API.
Source
type config = {learning_rate : float;gamma : float;use_baseline : bool;reward_scale : float;entropy_coef : float;max_episode_steps : int;
}Source
type metrics = {episode_return : float;episode_length : int;episode_won : bool;stage_desc : string;avg_entropy : float;avg_log_prob : float;adv_mean : float;adv_std : float;value_loss : float option;total_steps : int;total_episodes : int;
}Source
val init :
?baseline_network:Kaun.module_ ->
env:
((float, Bigarray.float32_elt) Rune.t,
(int32, Bigarray.int32_elt) Rune.t,
'render)
Fehu.Env.t ->
policy_network:Kaun.module_ ->
rng:Rune.Rng.key ->
config:config ->
unit ->
params * stateSource
val step :
env:
((float, Bigarray.float32_elt) Rune.t,
(int32, Bigarray.int32_elt) Rune.t,
'render)
Fehu.Env.t ->
params:params ->
state:state ->
params * stateSource
val train :
?baseline_network:Kaun.module_ ->
env:
((float, Bigarray.float32_elt) Rune.t,
(int32, Bigarray.int32_elt) Rune.t,
'render)
Fehu.Env.t ->
policy_network:Kaun.module_ ->
rng:Rune.Rng.key ->
config:config ->
total_timesteps:int ->
?callback:(metrics -> [ `Continue | `Stop ]) ->
unit ->
params * stateSource
val load :
path:string ->
env:
((float, Bigarray.float32_elt) Rune.t,
(int32, Bigarray.int32_elt) Rune.t,
'render)
Fehu.Env.t ->
policy_network:Kaun.module_ ->
?baseline_network:Kaun.module_ ->
config:config ->
unit ->
(params * state, string) result sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>