package fehu
Reinforcement learning framework for OCaml
Install
dune-project
Dependency
Authors
Maintainers
Sources
raven-1.0.0.alpha1.tbz
sha256=8e277ed56615d388bc69c4333e43d1acd112b5f2d5d352e2453aef223ff59867
sha512=369eda6df6b84b08f92c8957954d107058fb8d3d8374082e074b56f3a139351b3ae6e3a99f2d4a4a2930dd950fd609593467e502368a13ad6217b571382da28c
doc/src/fehu.envs/random_walk.ml.html
Source file random_walk.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
open Fehu type observation = (float, Rune.float32_elt) Rune.t type action = (int32, Rune.int32_elt) Rune.t type render = string type state = { mutable position : float; mutable steps : int } let observation_space = Space.Box.create ~low:[| -10.0 |] ~high:[| 10.0 |] let action_space = Space.Discrete.create 2 let metadata = Metadata.default |> Metadata.add_render_mode "ansi" |> Metadata.with_description (Some "Simple one-dimensional random walk environment") |> Metadata.add_author "Fehu New" |> Metadata.with_version (Some "0.1.0") let reset _env ?options:_ () state = state.position <- 0.0; state.steps <- 0; (Rune.create Rune.float32 [| 1 |] [| 0.0 |], Info.empty) let step _env action state = let action_value = let tensor = Rune.reshape [| 1 |] action in let arr : Int32.t array = Rune.to_array tensor in Int32.to_int arr.(0) in let direction = if action_value = 0 then -1.0 else 1.0 in let updated = state.position +. direction in let clamped = Float.min 10.0 (Float.max (-10.0) updated) in state.position <- clamped; state.steps <- state.steps + 1; let terminated = Float.abs state.position >= 10.0 in let truncated = state.steps >= 200 in let reward = -.Float.abs state.position in let info = Info.set "steps" (Info.int state.steps) Info.empty in let observation = Rune.create Rune.float32 [| 1 |] [| state.position |] in Env.transition ~observation ~reward ~terminated ~truncated ~info () let render state = let offset = int_of_float (state.position +. 10.) in let offset = max 0 (min 20 offset) in let buffer = Bytes.make 21 '.' in Bytes.set buffer offset 'o'; Format.asprintf "Position: %+.2f@.|%s|" state.position (Bytes.to_string buffer) let make ~rng () = let state = { position = 0.0; steps = 0 } in Env.create ~id:"RandomWalk-v0" ~metadata ~rng ~observation_space ~action_space ~reset:(fun env ?options () -> reset env ?options () state) ~step:(fun env action -> step env action state) ~render:(fun _ -> Some (render state)) ~close:(fun _ -> ()) ()
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>