package fehu

  1. Overview
  2. Docs

Source file random_walk.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
(*---------------------------------------------------------------------------
  Copyright (c) 2026 The Raven authors. All rights reserved.
  SPDX-License-Identifier: ISC
  ---------------------------------------------------------------------------*)

open Fehu

type obs = (float, Nx.float32_elt) Nx.t
type act = (int32, Nx.int32_elt) Nx.t
type render = string

let step_size = 1.0
let max_position = 10.0
let max_steps = 200

let observation_space =
  Space.Box.create ~low:[| -.max_position |] ~high:[| max_position |]

let action_space = Space.Discrete.create 2
let make_obs position = Nx.create Nx.float32 [| 1 |] [| position |]

let render_ansi position =
  let offset = int_of_float (position +. max_position) in
  let offset = max 0 (min 20 offset) in
  let buffer = Bytes.make 21 '.' in
  Bytes.set buffer offset 'o';
  Printf.sprintf "Position: %+.2f\n|%s|" position (Bytes.to_string buffer)

let make ?render_mode () =
  let position = ref 0.0 in
  let steps = ref 0 in
  let reset _env ?options:_ () =
    position := 0.0;
    steps := 0;
    (make_obs 0.0, Info.empty)
  in
  let step _env action =
    let direction =
      if Space.Discrete.to_int action = 0 then -.step_size else step_size
    in
    let updated = !position +. direction in
    let clamped = Float.min max_position (Float.max (-.max_position) updated) in
    position := clamped;
    incr steps;
    let terminated = Float.abs clamped >= max_position in
    let truncated = (not terminated) && !steps >= max_steps in
    let reward = -.Float.abs clamped in
    let info = Info.set "steps" (Info.int !steps) Info.empty in
    Env.step_result ~observation:(make_obs clamped) ~reward ~terminated
      ~truncated ~info ()
  in
  let render () = Some (render_ansi !position) in
  Env.create ?render_mode ~render_modes:[ "ansi" ] ~id:"RandomWalk-v0"
    ~observation_space ~action_space ~reset ~step ~render ()