package hardcaml_xilinx

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file true_dual_port_ram.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
open Base
open Hardcaml
open Signal
module Tdpram = Xpm_2019_1.Xpm_memory_tdpram

(* Block RAM - address collision behaviour.  UG573, table 1-3, common clocks.

   For a given mode on port a and b, and read/write enables on each port, what is the
   resulting value on the data out ports, and stored in memory?

   {v
   port a     port b   wea       web        doa   dob   mem
  RF/WF/NC | RF/WF/NC | 0       | 0       | OLD | OLD | NC
  RF       | RF/WF/NC | 1 (DIA) | 0       | OLD | OLD | DIA
  WF       | RF/WF/NC | 1 (DIA) | 0       | DIA | X   | DIA
  NC       | RF/WF/NC | 1 (DIA) | 0       | NC  | X   | DIA
  RF/WF/NC | RF       | 0       | 1 (DIB) | OLD | OLD | DIB
  RF/WF/NC | WF       | 0       | 1 (DIB) | X   | DIB | DIB
  RF/WF/NC | NC       | 0       | 1 (DIB) | X   | NC  | DIB
  RF/WF/NC | RF/WF/NC | 1       | 1       | X   | X   | X
v}

   RF     = Read first
   WF     = Write first
   NC     = No change
   OLD    = Old values stored in memory
   DIA/B  = Data in A or B
   X      = Invalid
   we[ab] = write when [1], read when [0]
   In all cases the addresses ports a and b are the same value.
*)

(* Ultra RAM

   These work differently. They have 2 port [a] and [b]. The RAM is "double pumped" - that
   is it works at twice the nominal clock rate and performs the [a] operation followed by
   the [b] operation.

   On a [write] operation, the output data on the same port is unchanged. Somewhat similar
   to [no_change] mode.

   Across ports, the behavior depends on the ordering of ports ie Write [a], will be
   reflected on read [b]. But not the other way round.
*)

let any t = tree ~arity:2 ~f:(reduce ~f:( |: )) (Signal.bits_msb t)

let collision_mode (arch : Ram_arch.t) : Collision_mode.t =
  match arch with
  | Distributed -> Read_before_write
  | Blockram mode -> mode
  | Ultraram -> No_change
;;

module Size_calculations = struct
  type t =
    { size_a : int
    ; size_b : int
    ; width_a : int
    ; width_b : int
    }
  [@@deriving sexp_of]

  let create ~size ~(port_a : _ Ram_port.t) ~(port_b : _ Ram_port.t) =
    let width_a = width port_a.data in
    let width_b = width port_b.data in
    let size_a = size in
    if size_a * width_a % width_b <> 0
    then
      raise_s
        [%message
          "[size] is the number of port A words in the RAM. It must be chosen so that \
           there is an integer number of port B words in the RAM as well."
            (size_a : int)
            (width_a : int)
            (width_b : int)];
    let size_b = size_a * width_a / width_b in
    { width_a; width_b; size_a; size_b }
  ;;
end

let create_xpm
      ~read_latency
      ~arch
      ~clock_a
      ~clock_b
      ~clear_a
      ~clear_b
      ~size
      ~byte_write_width
      ~(port_a : _ Ram_port.t)
      ~(port_b : _ Ram_port.t)
      ~cascade_height:arg_cascade_height
      ~memory_optimization:arg_memory_optimization
  =
  let byte_write_width (port : _ Ram_port.t) =
    match byte_write_width with
    | Byte_write_width.B8 -> 8
    | B9 -> 9
    | Full -> width port.data
  in
  let { Size_calculations.size_a; size_b; width_a; width_b } =
    Size_calculations.create ~size ~port_a ~port_b
  in
  let addr_bits_a = Bits.address_bits_for size_a in
  let addr_bits_b = Bits.address_bits_for size_b in
  let module Params = struct
    include Tdpram.P

    let memory_optimization =
      match arg_memory_optimization with
      | None -> memory_optimization
      | Some false -> "false"
      | Some true -> "true"
    ;;

    let cascade_height =
      match arg_cascade_height with
      | None -> cascade_height
      | Some arg_cascade_height -> Cascade_height.to_xpm_args arg_cascade_height
    ;;

    let write_data_width_a = width_a
    let write_data_width_b = width_b
    let byte_write_width_a = byte_write_width port_a
    let byte_write_width_b = byte_write_width port_b
    let read_data_width_a = width_a
    let read_data_width_b = width_b
    let addr_width_a = addr_bits_a
    let addr_width_b = addr_bits_b
    let memory_size = width_a * size_a
    let memory_primitive = Ram_arch.to_xpm_parameter arch
    let read_latency_a = read_latency
    let read_latency_b = read_latency
    let use_mem_init = 0
    let write_mode_a = Collision_mode.to_xpm_parameter (collision_mode arch)
    let write_mode_b = write_mode_a
  end
  in
  let write_enable_width (port : _ Ram_port.t) =
    let byte_write_width = byte_write_width port in
    assert (width port.data % byte_write_width = 0);
    width port.data / byte_write_width
  in
  assert (read_latency > 0);
  assert (Params.addr_width_a = width port_a.address);
  assert (Params.addr_width_b = width port_b.address);
  assert (write_enable_width port_a = width port_a.write_enable);
  assert (write_enable_width port_b = width port_b.write_enable);
  let module RAM = Tdpram.Make (Params) in
  let ena = any port_a.write_enable |: port_a.read_enable in
  let enb = any port_b.write_enable |: port_b.read_enable in
  let regce clock en =
    let spec = Reg_spec.create () ~clock in
    match read_latency with
    | 1 -> vdd
    | n -> pipeline spec ~enable:vdd ~n:(n - 1) en
  in
  let ram : _ RAM.O.t =
    RAM.create
      { RAM.I.clka (* Port A *) = clock_a
      ; rsta = clear_a
      ; regcea = regce clock_a port_a.read_enable
      ; ena
      ; wea = port_a.write_enable
      ; dina = port_a.data
      ; addra = port_a.address
      ; injectsbiterra = gnd
      ; injectdbiterra = gnd (* Port B *)
      ; clkb = clock_b
      ; rstb = clear_b
      ; regceb = regce clock_b port_b.read_enable
      ; enb
      ; web = port_b.write_enable
      ; dinb = port_b.data
      ; addrb = port_b.address
      ; injectsbiterrb = gnd
      ; injectdbiterrb = gnd
      ; sleep = gnd
      }
  in
  ram.douta, ram.doutb
;;

let rec output_pipe ~clock ~clear ~latency ~enable d =
  let spec = Reg_spec.create () ~clock in
  let spec_c = Reg_spec.create () ~clock ~clear in
  match latency with
  | 0 -> d
  | 1 -> reg spec_c ~enable:(reg spec ~enable:vdd enable) d
  | _ ->
    output_pipe
      ~clock
      ~clear
      ~latency:(latency - 1)
      ~enable:(reg spec ~enable:vdd enable)
      (reg spec ~enable:vdd d)
;;

(* This is very similar to rams built with [Ram.create]. The main difference is when
   modelling ultrarams. To get the correct behaviour for a write on one port and read on
   the other port, we must put port [a] into [Read_before_write] mode, and port [b] into
   [Write_before_read] mode. *)
let create_base_rtl_ram
      ~simulation_name
      ~(arch : Ram_arch.t)
      ~clock_a
      ~clock_b
      ~size
      ~(port_a : _ Ram_port.t)
      ~(port_b : _ Ram_port.t)
  =
  let reg clock enable = reg (Reg_spec.create ~clock ()) ~enable in
  let read_enable (port : _ Ram_port.t) =
    match collision_mode arch with
    | No_change -> port.read_enable &: ~:(port.write_enable)
    | Read_before_write | Write_before_read -> port.read_enable |: port.write_enable
  in
  let reg_a = reg clock_a (read_enable port_a) in
  let reg_b = reg clock_b (read_enable port_b) in
  let f_read_address, f_q =
    match arch with
    | Ultraram -> [| Fn.id; reg_b |], [| reg_a; Fn.id |]
    | Distributed | Blockram (Read_before_write | No_change) ->
      [| Fn.id; Fn.id |], [| reg_a; reg_b |]
    | Blockram Write_before_read -> [| reg_a; reg_b |], [| Fn.id; Fn.id |]
  in
  let q =
    Signal.multiport_memory
      size
      ?name:simulation_name
      ~write_ports:
        [| { write_clock = clock_a
           ; write_enable = port_a.write_enable
           ; write_address = port_a.address
           ; write_data = port_a.data
           }
         ; { write_clock = clock_b
           ; write_enable =
               (match arch with
                | Distributed -> gnd
                (* Distributed RAM will not write on port B. *)
                | Blockram _ | Ultraram -> port_b.write_enable)
           ; write_address = port_b.address
           ; write_data = port_b.data
           }
        |]
      ~read_addresses:
        (Array.map2_exn f_read_address [| port_a.address; port_b.address |] ~f:(fun f a ->
           f a))
  in
  Array.map2_exn f_q q ~f:(fun f q -> f q)
;;

let create_rtl'
      ~simulation_name
      ~read_latency
      ~arch
      ~clock_a
      ~clock_b
      ~clear_a
      ~clear_b
      ~size
      ~(port_a : _ Ram_port.t)
      ~(port_b : _ Ram_port.t)
  =
  assert (read_latency > 0);
  let q =
    create_base_rtl_ram ~simulation_name ~arch ~clock_a ~clock_b ~size ~port_a ~port_b
  in
  ( output_pipe
      ~clock:clock_a
      ~clear:clear_a
      ~latency:(read_latency - 1)
      ~enable:port_a.read_enable
      q.(0)
  , output_pipe
      ~clock:clock_b
      ~clear:clear_b
      ~latency:(read_latency - 1)
      ~enable:port_b.read_enable
      q.(1) )
;;

(* Instantiate the core rtl ram multiple times so that it can support byte enables.*)
let create_rtl
      ~simulation_name
      ~read_latency
      ~arch
      ~clock_a
      ~clock_b
      ~clear_a
      ~clear_b
      ~size
      ~(byte_write_width : Byte_write_width.t)
      ~(port_a : _ Ram_port.t)
      ~(port_b : _ Ram_port.t)
  =
  let { Size_calculations.size_a; size_b; width_a; width_b } =
    Size_calculations.create ~size ~port_a ~port_b
  in
  let (port_a_split, read_select_a), (port_b_split, read_select_b) =
    if width_a <> width_b
    then (
      let min_width = Int.min width_a width_b in
      let max_width = Int.max width_a width_b in
      if max_width % min_width <> 0
      then
        raise_s
          [%message
            "max port data width must be an exact integer multiple of min port data \
             width. (update the simulation model if something else is required)"
              (max_width : int)
              (min_width : int)];
      let scale = max_width / min_width in
      if not (Int.is_pow2 scale)
      then
        raise_s
          [%message
            "ratio between port widths must be a power of 2. (update the simulation \
             model if non-power-of-2 scale is required)"
              (scale : int)];
      (match byte_write_width with
       | B8 | B9 ->
         raise_s
           [%message
             "byte enables not supported when port resizing is used (update the \
              simulation model if this is required)"]
       | Full -> ());
      let log_scale = Int.ceil_log2 scale in
      let map_port (port : _ Ram_port.t) =
        if width port.data = max_width
        then (
          let ports =
            split_lsb ~part_width:min_width port.data
            |> List.map ~f:(fun data -> { port with data })
          in
          ports, None)
        else (
          assert (width port.data = min_width);
          let read_select = sel_bottom port.address log_scale in
          let address = drop_bottom port.address log_scale in
          let ports =
            List.init (1 lsl log_scale) ~f:(fun word ->
              let enable = sel_bottom port.address log_scale ==:. word in
              { port with
                address
              ; write_enable = port.write_enable &: enable
              ; read_enable = port.read_enable &: enable
              })
          in
          ports, Some (read_select, port.read_enable))
      in
      map_port port_a, map_port port_b)
    else (
      let split_port (port : _ Ram_port.t) =
        let split_port byte_width =
          let data = split_lsb ~part_width:byte_width port.data in
          let write_enable = bits_lsb port.write_enable in
          List.map2_exn data write_enable ~f:(fun data write_enable ->
            { port with data; write_enable })
        in
        match byte_write_width with
        | Full -> [ port ]
        | B8 -> split_port 8
        | B9 -> split_port 9
      in
      (split_port port_a, None), (split_port port_b, None))
  in
  (* The width of the resulting RAM is the wider of the two port widths. So, the depth is
     given by the corresponding depth, which will be the smaller of the two sizes. *)
  let min_size = Int.min size_a size_b in
  let qs =
    List.map2_exn port_a_split port_b_split ~f:(fun port_a port_b ->
      create_rtl'
        ~simulation_name
        ~read_latency
        ~arch
        ~clock_a
        ~clock_b
        ~clear_a
        ~clear_b
        ~size:min_size
        ~port_a
        ~port_b)
  in
  let qa, qb = List.unzip qs in
  let apply_read_select spec q rs =
    Option.map rs ~f:(fun (rs, re) ->
      mux (pipeline spec ~n:(read_latency - 1) (reg spec ~enable:re rs)) q)
    |> Option.value ~default:(concat_lsb q)
  in
  let spec_a = Reg_spec.create ~clock:clock_a ~clear:clear_a () in
  let spec_b = Reg_spec.create ~clock:clock_b ~clear:clear_b () in
  apply_read_select spec_a qa read_select_a, apply_read_select spec_b qb read_select_b
;;

let create
      ?(read_latency = 1)
      ?(arch = Ram_arch.Blockram No_change)
      ?(byte_write_width = Byte_write_width.Full)
      ?memory_optimization
      ?cascade_height
      ?simulation_name
      ~(build_mode : Build_mode.t)
      ()
  =
  match build_mode with
  | Simulation -> create_rtl ~simulation_name ~read_latency ~arch ~byte_write_width
  | Synthesis ->
    create_xpm ~read_latency ~arch ~byte_write_width ~cascade_height ~memory_optimization
;;