package fehu
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>
Reinforcement learning for OCaml
Install
dune-project
Dependency
Authors
Maintainers
Sources
raven-1.0.0.alpha3.tbz
sha256=96d35ce03dfbebd2313657273e24c2e2d20f9e6c7825b8518b69bd1d6ed5870f
sha512=90c5053731d4108f37c19430e45456063e872b04b8a1bbad064c356e1b18e69222de8bfcf4ec14757e71f18164ec6e4630ba770dbcb1291665de5418827d1465
doc/src/fehu.envs/mountain_car.ml.html
Source file mountain_car.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84(*--------------------------------------------------------------------------- Copyright (c) 2026 The Raven authors. All rights reserved. SPDX-License-Identifier: ISC ---------------------------------------------------------------------------*) open Fehu type obs = (float, Nx.float32_elt) Nx.t type act = (int32, Nx.int32_elt) Nx.t type render = string (* Physics constants matching Gymnasium MountainCar-v0 *) let min_position = -1.2 let max_position = 0.6 let max_speed = 0.07 let goal_position = 0.5 let goal_velocity = 0.0 let force = 0.001 let gravity = 0.0025 let max_steps = 200 let observation_space = Space.Box.create ~low:[| min_position; -.max_speed |] ~high:[| max_position; max_speed |] let action_space = Space.Discrete.create 3 let make_obs position velocity = Nx.create Nx.float32 [| 2 |] [| position; velocity |] let make ?render_mode () = let position = ref 0.0 in let velocity = ref 0.0 in let steps = ref 0 in let reset _env ?options:_ () = let r = Nx.rand Nx.float32 [| 1 |] in let v = (Nx.to_array r).(0) in position := -0.6 +. (v *. 0.2); velocity := 0.0; steps := 0; (make_obs !position !velocity, Info.empty) in let step _env action = let force_direction = float_of_int (Space.Discrete.to_int action - 1) in let vel = !velocity +. (force_direction *. force) -. (gravity *. cos (3.0 *. !position)) in let vel = Float.max (-.max_speed) (Float.min vel max_speed) in let pos = !position +. vel in let pos = Float.max min_position (Float.min pos max_position) in let vel = if pos = min_position && vel < 0.0 then 0.0 else vel in position := pos; velocity := vel; incr steps; let terminated = pos >= goal_position && vel >= goal_velocity in let truncated = (not terminated) && !steps >= max_steps in let reward = -1.0 in let info = Info.set "steps" (Info.int !steps) Info.empty in Env.step_result ~observation:(make_obs pos vel) ~reward ~terminated ~truncated ~info () in let render () = let normalized_pos = (!position -. min_position) /. (max_position -. min_position) in let car_pos = int_of_float (normalized_pos *. 40.0) in let goal_pos = int_of_float ((goal_position -. min_position) /. (max_position -. min_position) *. 40.0) in let track = Bytes.make 41 '-' in Bytes.set track goal_pos 'G'; Bytes.set track (max 0 (min 40 car_pos)) 'C'; Some (Printf.sprintf "MountainCar: [%s] pos=%.3f, vel=%.3f, steps=%d" (Bytes.to_string track) !position !velocity !steps) in Env.create ?render_mode ~render_modes:[ "ansi" ] ~id:"MountainCar-v0" ~observation_space ~action_space ~reset ~step ~render ()
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>