Source file lrc.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
(** This module constructs a graph refining the LR automaton to reason about
reachable configurations---the pairs of an LR state and a lookahead token, the
transitions that allow to go from one to another, in order to determine which
ones are reachable from initial states. It includes functionality to compute
reachable states, wait states, entry points, predecessors, successors, and
prefixes for states in the LR automaton.
LRC means "LR with classes", where a class is the partition of lookahead
symbols with identical behaviors, as determined by the reachability
analysis. *)
open Utils
open Misc
open Fix.Indexing
open Info
type ('g, 'n) t = {
lr1_of: ('n, 'g lr1 index) vector;
lrcs_of: ('g lr1, 'n indexset) vector;
all_wait: 'n indexset;
all_leaf: 'n indexset;
all_successors: ('n, 'n indexset) vector;
reachable_from: ('n, 'n indexset) vector;
}
let count t = Vector.length t.lr1_of
let index_delta (type n) (i : n index) (j : n index) =
(i :> int) - (j :> int)
let class_index t lrc =
index_delta lrc (Option.get (IndexSet.minimum t.lrcs_of.:(t.lr1_of.:(lrc))))
module N = Unsafe_cardinal()
type 'g n = 'g N.t
let make (type g) (g : g grammar) ((module Reachability) : g Reachability.t) =
let n =
let count lr1 = Array.length (Reachability.Classes.for_lr1 lr1) in
let sum = ref 0 in
Index.iter (Lr1.cardinal g) (fun lr1 -> sum := !sum + count lr1);
!sum
in
let open N.Const(struct type t = g let cardinal = n end) in
let index_shift (i : n index) offset =
Index.of_int n ((i :> int) + offset)
in
let lr1_of = Vector.make' n (fun () -> Index.of_int (Lr1.cardinal g) 0) in
let lrcs_of =
let count = ref 0 in
let init_lr1 lr1 =
let classes = Reachability.Classes.for_lr1 lr1 in
assert (Array.length classes > 0);
let first = Index.of_int n !count in
count := !count + Array.length classes;
let all = ref IndexSet.empty in
for i = Array.length classes - 1 downto 0 do
let lrc = index_shift first i in
all := IndexSet.add lrc !all;
Vector.set lr1_of lrc lr1
done;
!all
in
Vector.init (Lr1.cardinal g) init_lr1
in
let first_lrc_of =
Vector.map (fun x -> Option.get (IndexSet.minimum x)) lrcs_of
in
let all_wait = IndexSet.map (Vector.get first_lrc_of) (Lr1.wait g) in
stopwatch 2 "Allocated LRC set (%d elements)" (cardinal n);
let predecessors = Vector.make n IndexSet.empty in
let process lr1 =
let tgt_first = first_lrc_of.:(lr1) in
match Option.map (Symbol.desc g) (Lr1.incoming g lr1) with
| None -> ()
| Some (T t) ->
predecessors.:(tgt_first) <-
IndexSet.map (fun tr ->
let src = Transition.source g tr in
let classes = Reachability.Classes.for_lr1 src in
let class_index _ classe = IndexSet.mem t classe in
index_shift first_lrc_of.:(src)
(Misc.array_findi class_index 0 classes)
) (Transition.predecessors g lr1)
| Some _ ->
let process_transition tr =
let node = Reachability.Tree.leaf tr in
let encode = Reachability.Cell.encode node in
let pre_classes = Reachability.Classes.pre_transition tr in
let post_classes = Reachability.Classes.post_transition tr in
let coercion =
Reachability.Coercion.infix post_classes
(Reachability.Classes.for_lr1 lr1)
in
let src_first = first_lrc_of.:(Transition.source g tr) in
let pre_classes = Array.length pre_classes in
let post_classes = Array.length post_classes in
for post = 0 to post_classes - 1 do
let reachable = ref IndexSet.empty in
for pre = 0 to pre_classes - 1 do
if Reachability.Analysis.cost (encode ~pre ~post) < max_int then
reachable := IndexSet.add (index_shift src_first pre) !reachable
done;
let reachable = !reachable in
Array.iter (fun index ->
predecessors.@(index_shift tgt_first index) <- IndexSet.union reachable)
coercion.forward.(post)
done
in
IndexSet.rev_iter process_transition (Transition.predecessors g lr1)
in
Index.rev_iter (Lr1.cardinal g) process;
stopwatch 2 "Computed LRC transitions/predecessors";
let all_successors = Misc.relation_reverse n predecessors in
stopwatch 2 "Computed LRC successors";
let reachable_from = Vector.copy all_successors in
Tarjan.close_relation (fun f i -> IndexSet.iter f all_successors.:(i)) reachable_from;
stopwatch 2 "Closed LRC successors";
let all_leaf = IndexSet.all n in
{lr1_of; lrcs_of; all_wait; all_leaf; all_successors; reachable_from}
let to_string g t lrc =
Printf.sprintf "%s/%d"
(Lr1.to_string g t.lr1_of.:(lrc))
(class_index t lrc)
let set_to_string info t lrcs =
string_of_indexset ~index:(to_string info t) lrcs
type 'n entrypoints = {
reachable: 'n indexset;
wait: 'n indexset;
entrypoints: 'n indexset;
successors: ('n, 'n indexset) vector;
predecessors: ('n, 'n indexset) vector;
some_prefix: 'n index -> 'n index list;
}
let from_entrypoints (type g n) (g: g grammar) lrc_graph entrypoints : n entrypoints =
let reachable = ref IndexSet.empty in
let successors =
let table = Vector.make (count lrc_graph) IndexSet.empty in
let todo = ref entrypoints in
let populate i =
reachable := IndexSet.add i !reachable;
if IndexSet.is_empty table.:(i) then (
let succ = lrc_graph.all_successors.:(i) in
todo := IndexSet.union succ !todo;
Vector.set table i succ;
)
in
while IndexSet.is_not_empty !todo do
let todo' = !todo in
todo := IndexSet.empty;
IndexSet.rev_iter populate todo';
done;
table
in
let predecessors = Misc.relation_reverse (count lrc_graph) successors in
let reachable = !reachable in
let wait = IndexSet.inter lrc_graph.all_wait reachable in
let some_prefix =
let table = lazy (
let table = Vector.make (count lrc_graph) [] in
let todo = ref [] in
let expand prefix state =
match Vector.get table state with
| [] ->
Vector.set table state prefix;
let prefix = state :: prefix in
let succ = successors.:(state) in
if IndexSet.is_not_empty succ then
push todo (succ, prefix)
| _ -> ()
in
Vector.iteri (fun lr1 lrcs ->
if Option.is_none (Lr1.incoming g lr1) then
expand [] (Option.get (IndexSet.minimum lrcs)))
lrc_graph.lrcs_of;
let propagate (succ, prefix) =
IndexSet.iter (expand prefix) succ
in
fixpoint ~propagate todo;
table
)
in
(fun st -> Vector.get (Lazy.force table) st)
in
{reachable; wait; entrypoints; successors; predecessors; some_prefix}
let check_deterministic (type g) (g : g grammar) ((module Reachability) : g Reachability.t) =
let prefix = Vector.make (Lr1.cardinal g) [] in
let rec loop next = function
| [] -> if not (list_is_empty next) then loop [] next
| xs :: xxs ->
let x = List.hd xs in
let next =
if list_is_empty prefix.:(x)
then (
prefix.:(x) <- xs;
IndexSet.fold
(fun tr next -> (Transition.target g tr :: xs) :: next)
(Transition.successors g x) next
) else next
in
loop next xxs
in
loop [] (List.map (fun x -> [x]) (IndexSet.elements (Lr1.entrypoints g)));
let todo = ref [] in
let table = Hashtbl.create 7 in
let visit path lr1 classes =
let key = (lr1, classes) in
let path = key :: path in
let print_class cl =
if cl == Terminal.all g then
"T"
else
string_concat_map ~wrap:("{","}") ","
(Terminal.to_string g)
(List.rev (IndexSet.elements cl))
in
let print (lr1, classes) =
let all_classes = Reachability.Classes.for_lr1 lr1 in
Printf.sprintf "%s@{%s}"
(Lr1.to_string g lr1)
(string_concat_map ", "
(fun i -> print_class all_classes.(i))
(IntSet.elements classes))
in
if IntSet.is_empty classes then
Printf.eprintf "Found dead-end: %s\n prefix: %s\n starting classes: %s\n"
(string_concat_map " -> " print path)
(Lr1.list_to_string g (List.rev (List.tl prefix.:(lr1))))
(let top, _ = List.hd (List.rev path) in
string_concat_map ~wrap:("{","}") ","
print_class
(Array.to_list (Reachability.Classes.for_lr1 top))
)
else
if not (Hashtbl.mem table key) then (
Hashtbl.add table key ();
push todo path
)
in
let propagate path =
let (target, post_class_indices) = List.hd path in
IndexSet.iter begin fun tr ->
let source = Transition.source g tr in
let node = Reachability.Tree.leaf tr in
let encode = Reachability.Cell.encode node in
let pre_classes = Reachability.Classes.for_lr1 source in
let pre_class_indices =
match Transition.split g tr with
| R sh ->
let term = Transition.shift_symbol g sh in
let pre =
Utils.Misc.array_findi
(fun _ cb -> IndexSet.mem term cb)
0 pre_classes
in
if Reachability.Analysis.cost (encode ~pre ~post:0) < max_int then
IntSet.singleton pre
else
IntSet.empty
| L gt ->
let post_classes = Reachability.Classes.for_lr1 target in
let mid_classes = Reachability.Classes.for_edge gt in
IntSet.bind post_class_indices begin fun post_index ->
let ca = post_classes.(post_index) in
match
Utils.Misc.array_findi
(fun _ cb -> IndexSet.quick_subset ca cb) 0 mid_classes
with
| exception Not_found -> IntSet.empty
| mid_index ->
IntSet.init_subset 0 (Array.length pre_classes - 1)
(fun pre -> Reachability.Analysis.cost (encode ~pre ~post:mid_index) < max_int)
end
in
visit path source pre_class_indices
end (Transition.predecessors g target)
in
Index.iter (Lr1.cardinal g) (fun lr1 ->
let len = Array.length (Reachability.Classes.for_lr1 lr1) in
if len > 0 then
visit [] lr1 (IntSet.init_interval 0 (len - 1))
);
fixpoint ~propagate todo;
stopwatch 2 "Determinized Lrc all: %d states" (Hashtbl.length table)
module Mlrc = Unsafe_cardinal()
type 'g mlrc = 'g Mlrc.t
let make_minimal (type g) (g : g grammar) ((module Reachability) : g Reachability.t) : (g, g mlrc) t =
let open IndexBuffer in
let module State = Gen.Make() in
let module Transitions = Gen.Make() in
let states = State.get_generator () in
let transitions = Transitions.get_generator () in
let table = Hashtbl.create 7 in
let todo = ref [] in
let visit lr1 classes =
assert (not (IntSet.is_empty classes));
let key = (lr1, classes) in
match Hashtbl.find_opt table key with
| Some index -> index
| None ->
let index = Gen.add states key in
Hashtbl.add table key index;
push todo index;
index
in
let propagate index =
let (target, post_class_indices) = Gen.get states index in
IndexSet.iter begin fun tr ->
let source = Transition.source g tr in
let node = Reachability.Tree.leaf tr in
let encode = Reachability.Cell.encode node in
let pre_classes = Reachability.Classes.for_lr1 source in
let pre_class_indices =
match Transition.split g tr with
| R sh ->
let term = Transition.shift_symbol g sh in
let pre =
Utils.Misc.array_findi
(fun _ cb -> IndexSet.mem term cb)
0 pre_classes
in
if Reachability.Analysis.cost (encode ~pre ~post:0) < max_int then
IntSet.singleton pre
else
IntSet.empty
| L gt ->
let post_classes = Reachability.Classes.for_lr1 target in
let mid_classes = Reachability.Classes.for_edge gt in
IntSet.bind post_class_indices begin fun post_index ->
let ca = post_classes.(post_index) in
match
Utils.Misc.array_findi
(fun _ cb -> IndexSet.quick_subset ca cb) 0 mid_classes
with
| exception Not_found -> IntSet.empty
| mid_index ->
IntSet.init_subset 0 (Array.length pre_classes - 1)
(fun pre -> Reachability.Analysis.cost (encode ~pre ~post:mid_index) < max_int)
end
in
if not (IntSet.is_empty pre_class_indices) then
let index' = visit source pre_class_indices in
ignore (Gen.add transitions (index, index'))
end (Transition.predecessors g target)
in
let fast_map s f =
let l = IndexSet.fold (fun x acc -> match f x with None -> acc | Some y -> y :: acc) s [] in
IndexSet.of_list l
in
let visit_state lr1 =
let len = Array.length (Reachability.Classes.for_lr1 lr1) in
if len > 0 then
let set = IntSet.init_interval 0 (len - 1) in
Some (visit lr1 set)
else
None
in
let all_wait = fast_map (Lr1.wait g) visit_state in
fixpoint ~propagate todo;
stopwatch 2 "Determinized Lrc wait: %d states" (Hashtbl.length table);
let all_leaf = fast_map (Lr1.all g) visit_state in
fixpoint ~propagate todo;
stopwatch 2 "Determinized Lrc all: %d states" (Hashtbl.length table);
let module Min =
Valmari.Minimize(struct
type t = g lr1 index
let compare = Index.compare
end)(struct
let source tr = fst (Gen.get transitions tr)
let target tr = snd (Gen.get transitions tr)
let label tr = fst (Gen.get states (source tr))
let initials f = IndexSet.iter f all_leaf
let finals f =
IndexSet.iter (fun lr1 ->
let state = (Hashtbl.find table (lr1, IntSet.singleton 0)) in
f state
) (Lr1.entrypoints g)
let refinements f =
IndexSet.iter (fun lr1 ->
match Hashtbl.find_opt table (lr1, IntSet.singleton 0) with
| None -> ()
| Some index -> f (fun ~add -> add index)
) (Lr1.entrypoints g)
type states = State.n
let states = State.n
type transitions = Transitions.n
let transitions = Transitions.n
end)
in
stopwatch 2 "Minimized deterministic Lrc: %d states" (cardinal Min.states);
let lr1_of =
Vector.init Min.states
(fun st -> fst (Gen.get states (Min.represent_state st)))
in
let lrcs_of = Vector.make (Lr1.cardinal g) IndexSet.empty in
Vector.rev_iteri (fun lrc lr1 -> lrcs_of.@(lr1) <- IndexSet.add lrc)
lr1_of;
Index.iter State.n (fun st ->
match Min.transport_state st with
| None -> ()
| Some mst ->
let st' = Min.represent_state mst in
assert (st = st' || fst (Gen.get states st) = lr1_of.:(mst))
);
let all_wait = IndexSet.filter_map Min.transport_state all_wait in
let all_leaf = IndexSet.filter_map Min.transport_state all_leaf in
let all_successors = Vector.make Min.states IndexSet.empty in
Index.rev_iter Min.transitions
(fun tr -> all_successors.@(Min.target tr) <- IndexSet.add (Min.source tr));
let reachable_from = Vector.copy all_successors in
Tarjan.close_relation
(fun f i -> IndexSet.iter f all_successors.:(i))
reachable_from;
stopwatch 2 "Minimal Lrc ready";
let open Mlrc.Eq(struct type t = g type n = Min.states let n = Min.states end) in
let Refl = eq in
{lr1_of; lrcs_of; all_wait; all_leaf; all_successors; reachable_from}
let transitions_of_states t ss =
IndexSet.fold
(fun s acc -> IndexMap.update t.lr1_of.:(s) (add_update s) acc)
ss IndexMap.empty
let some_prefix g t =
let prefix = Vector.make (count t) [] in
let rec loop next = function
| [] -> if not (list_is_empty next) then loop [] next
| xs :: xxs ->
let x = List.hd xs in
let next =
if list_is_empty prefix.:(x)
then (
prefix.:(x) <- xs;
IndexSet.fold (fun x' next -> (x' :: xs) :: next)
t.all_successors.:(x) next
) else next
in
loop next xxs
in
loop [] (IndexSet.bind (Lr1.entrypoints g) (Vector.get t.lrcs_of)
|> IndexSet.to_seq |> Seq.map (fun x -> [x])
|> List.of_seq);
prefix
let predecessors t =
relation_reverse (Vector.length t.all_successors) t.all_successors
let check_prefix g t1 p1 t2 p2 =
let l1 = Vector.make (Lr1.cardinal g) [] in
let l2 = Vector.make (Lr1.cardinal g) [] in
Vector.iteri (fun x p ->
let lr1 = t1.lr1_of.:(x) in
if list_is_empty l1.:(lr1) then l1.:(lr1) <- p
) p1;
Vector.iteri (fun x p ->
let lr1 = t2.lr1_of.:(x) in
if list_is_empty l2.:(lr1) then l2.:(lr1) <- p
) p2;
Index.iter (Lr1.cardinal g) (fun lr1 ->
let e1 = list_is_empty l1.:(lr1) in
let e2 = list_is_empty l2.:(lr1) in
if e1 <> e2 then
Printf.eprintf "state %s is unreachable only on %s side\n"
(Lr1.to_string g lr1) (if e1 then "left" else "right")
)
let check_equivalence g t1 t2 s1 s2 =
let table = Hashtbl.create 7 in
let successes = ref 0 in
let failures = ref 0 in
let todo = ref [] in
let schedule path s1 s2 =
let key = (s1, s2) in
if not (Hashtbl.mem table key) then (
Hashtbl.add table key ();
push todo (path, s1, s2)
)
in
let prefix1 = some_prefix g t1 in
let prefix2 = some_prefix g t2 in
check_prefix g t1 prefix1 t2 prefix2;
let pred1 = predecessors t1 in
let pred2 = predecessors t2 in
schedule [] s1 s2;
let propagate (path, s1, s2) =
let m1 = transitions_of_states t1 s1 in
let m2 = transitions_of_states t2 s2 in
let merge lr1 s1' s2' =
begin match s1', s2' with
| Some s1', Some s2' ->
incr successes;
schedule (lr1 :: path)
(IndexSet.bind s1' (Vector.get pred1))
(IndexSet.bind s2' (Vector.get pred2))
| _ ->
incr failures;
let p l =
Printf.eprintf "Suffix %s is reachable only on %s side (%s)\n"
(Lr1.list_to_string g (lr1 :: path))
(if Option.is_none s1' then "right" else "left")
(Lr1.list_to_string g l)
in
let l =
match s1', s2' with
| Some s1', _ -> List.map (Vector.get t1.lr1_of) prefix1.:(IndexSet.choose s1')
| _, Some s2' -> List.map (Vector.get t2.lr1_of) prefix2.:(IndexSet.choose s2')
| _ -> assert false
in
if l <> [] then
p l
end;
None
in
ignore (IndexMap.merge merge m1 m2)
in
fixpoint ~propagate todo;
Printf.eprintf "Tested %d successful paths, %d failing paths\n"
!successes !failures