package fehu

  1. Overview
  2. Docs
Reinforcement learning framework for OCaml

Install

dune-project
 Dependency

Authors

Maintainers

Sources

raven-1.0.0.alpha1.tbz
sha256=8e277ed56615d388bc69c4333e43d1acd112b5f2d5d352e2453aef223ff59867
sha512=369eda6df6b84b08f92c8957954d107058fb8d3d8374082e074b56f3a139351b3ae6e3a99f2d4a4a2930dd950fd609593467e502368a13ad6217b571382da28c

doc/src/fehu.envs/random_walk.ml.html

Source file random_walk.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
open Fehu

type observation = (float, Rune.float32_elt) Rune.t
type action = (int32, Rune.int32_elt) Rune.t
type render = string
type state = { mutable position : float; mutable steps : int }

let observation_space = Space.Box.create ~low:[| -10.0 |] ~high:[| 10.0 |]
let action_space = Space.Discrete.create 2

let metadata =
  Metadata.default
  |> Metadata.add_render_mode "ansi"
  |> Metadata.with_description
       (Some "Simple one-dimensional random walk environment")
  |> Metadata.add_author "Fehu New"
  |> Metadata.with_version (Some "0.1.0")

let reset _env ?options:_ () state =
  state.position <- 0.0;
  state.steps <- 0;
  (Rune.create Rune.float32 [| 1 |] [| 0.0 |], Info.empty)

let step _env action state =
  let action_value =
    let tensor = Rune.reshape [| 1 |] action in
    let arr : Int32.t array = Rune.to_array tensor in
    Int32.to_int arr.(0)
  in
  let direction = if action_value = 0 then -1.0 else 1.0 in
  let updated = state.position +. direction in
  let clamped = Float.min 10.0 (Float.max (-10.0) updated) in
  state.position <- clamped;
  state.steps <- state.steps + 1;
  let terminated = Float.abs state.position >= 10.0 in
  let truncated = state.steps >= 200 in
  let reward = -.Float.abs state.position in
  let info = Info.set "steps" (Info.int state.steps) Info.empty in
  let observation = Rune.create Rune.float32 [| 1 |] [| state.position |] in
  Env.transition ~observation ~reward ~terminated ~truncated ~info ()

let render state =
  let offset = int_of_float (state.position +. 10.) in
  let offset = max 0 (min 20 offset) in
  let buffer = Bytes.make 21 '.' in
  Bytes.set buffer offset 'o';
  Format.asprintf "Position: %+.2f@.|%s|" state.position
    (Bytes.to_string buffer)

let make ~rng () =
  let state = { position = 0.0; steps = 0 } in
  Env.create ~id:"RandomWalk-v0" ~metadata ~rng ~observation_space ~action_space
    ~reset:(fun env ?options () -> reset env ?options () state)
    ~step:(fun env action -> step env action state)
    ~render:(fun _ -> Some (render state))
    ~close:(fun _ -> ())
    ()