package fehu
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>
Reinforcement learning for OCaml
Install
dune-project
Dependency
Authors
Maintainers
Sources
raven-1.0.0.alpha3.tbz
sha256=96d35ce03dfbebd2313657273e24c2e2d20f9e6c7825b8518b69bd1d6ed5870f
sha512=90c5053731d4108f37c19430e45456063e872b04b8a1bbad064c356e1b18e69222de8bfcf4ec14757e71f18164ec6e4630ba770dbcb1291665de5418827d1465
doc/src/fehu.envs/random_walk.ml.html
Source file random_walk.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54(*--------------------------------------------------------------------------- Copyright (c) 2026 The Raven authors. All rights reserved. SPDX-License-Identifier: ISC ---------------------------------------------------------------------------*) open Fehu type obs = (float, Nx.float32_elt) Nx.t type act = (int32, Nx.int32_elt) Nx.t type render = string let step_size = 1.0 let max_position = 10.0 let max_steps = 200 let observation_space = Space.Box.create ~low:[| -.max_position |] ~high:[| max_position |] let action_space = Space.Discrete.create 2 let make_obs position = Nx.create Nx.float32 [| 1 |] [| position |] let render_ansi position = let offset = int_of_float (position +. max_position) in let offset = max 0 (min 20 offset) in let buffer = Bytes.make 21 '.' in Bytes.set buffer offset 'o'; Printf.sprintf "Position: %+.2f\n|%s|" position (Bytes.to_string buffer) let make ?render_mode () = let position = ref 0.0 in let steps = ref 0 in let reset _env ?options:_ () = position := 0.0; steps := 0; (make_obs 0.0, Info.empty) in let step _env action = let direction = if Space.Discrete.to_int action = 0 then -.step_size else step_size in let updated = !position +. direction in let clamped = Float.min max_position (Float.max (-.max_position) updated) in position := clamped; incr steps; let terminated = Float.abs clamped >= max_position in let truncated = (not terminated) && !steps >= max_steps in let reward = -.Float.abs clamped in let info = Info.set "steps" (Info.int !steps) Info.empty in Env.step_result ~observation:(make_obs clamped) ~reward ~terminated ~truncated ~info () in let render () = Some (render_ansi !position) in Env.create ?render_mode ~render_modes:[ "ansi" ] ~id:"RandomWalk-v0" ~observation_space ~action_space ~reset ~step ~render ()
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>