package menhirGLR

You can search for identifiers within the package.

in-package search v0.2.0

menhirGLR
- Library menhirGLR
  - MenhirGLR
    
    GLR
    
    Make
    
    GLRAPI
    
    DATA
    
    Semv
    
    Production
    
    State
    
    GSS
    
    Input
    
    LinearizedArray
    
    MiniBabySet
    
    MiniVector
    
    PackedIntArray
    
    Path
    
    PriorityQueue
    
    Make
    
    RowDisplacementDecode
    
    StaticVersion
    
    TableFormat
    
    TABLES
    
    TableInterpreter
    
    Make
    
    Semv
    
    Production
    
    State
    
    Tops
    
    Make
- Sources
  - menhirGLR
    
    GLR.ml
    
    GLRAPI.ml
    
    GSS.ml
    
    Input.ml
    
    LinearizedArray.ml
    
    MiniBabySet.ml
    
    MiniVector.ml
    
    PackedIntArray.ml
    
    Path.ml
    
    PriorityQueue.ml
    
    RowDisplacementDecode.ml
    
    StaticVersion.ml
    
    TableFormat.ml
    
    TableInterpreter.ml
    
    Tops.ml
    
    menhirGLR.ml

Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file `Input.ml`

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
(******************************************************************************)
(*                                                                            *)
(*                                    Menhir                                  *)
(*                                                                            *)
(*   Copyright Inria. All rights reserved. This file is distributed under     *)
(*   the terms of the GNU Library General Public License version 2, with a    *)
(*   special exception on linking, as described in the file LICENSE.          *)
(*                                                                            *)
(******************************************************************************)

open Lexing
module V = MiniVector

type 'token input = {

  lexer: lexbuf -> 'token;
  (**The lexer. *)

  lexbuf: lexbuf;
  (**The lexing buffer. *)

  mutable lookahead_token: 'token;
  (**The first unconsumed token may or may not have been obtained already
     from the lexer. This field should have type ['token option]. To avoid
     allocating an option, we use the address of the [input] object itself
     as a sentinel value, standing for [None]. (Cute, eh?) (Danger!)
     We save 2 words of memory per token, and about 2% in time, it seems. *)

  startpos: position V.vector;
  (**A vector of the start positions of the tokens that we have consumed. *)

  endpos: position V.vector;
  (**A vector of the end positions of the tokens that we have consumed. *)

  initp: position;
  (**The initial position. This position is obtained from the lexing buffer
     at the very beginning. *)

}

let create lexer lexbuf =
  let lookahead_token = Obj.magic () (* overwritten below *)
  and startpos = V.create()
  and endpos = V.create()
  and initp = lexbuf.lex_curr_p in
  let input = { lexer; lexbuf; lookahead_token; startpos; endpos; initp } in
  input.lookahead_token <- Obj.magic input;
  input

let[@inline] consume input =
  (* It would be surprising if the parser would consume a token without
     having examined it first. (An LR parser will never do this.) So, we
     consider this situation an error. *)
  assert (input.lookahead_token != Obj.magic input);
  (* Push the positions of the current lookahead symbol (which we are about
     to consume) into the position vectors. *)
  V.push input.startpos input.lexbuf.lex_start_p;
  V.push input.endpos input.lexbuf.lex_curr_p;
  (* Record the fact that we have not yet read the next token. *)
  input.lookahead_token <- Obj.magic input

let[@inline] lookahead input =
  let token = input.lookahead_token in
  if token != Obj.magic input then
    token
  else
    let token = input.lexer input.lexbuf in
    input.lookahead_token <- token;
    token

(* -------------------------------------------------------------------------- *)

(* A note on keeping track of positions. *)

(* Every node in the GSS carries a date, that is, an offset into the input,
   measured in number of tokens. A date represents a position *between* two
   tokens. *)

(* It would make sense to give the user access to dates, perhaps via new
   keywords that could be used in the semantic actions. *)

(* For the moment, for historical reasons, the user does not have access to
   dates. Instead the user has access to Menhir's traditional position
   keywords, such as [$startpos], [$endpos], [$startpos(x)], etc. *)

(* There is an unfortunate mismatch between the concept of position and the
   concept of date. Indeed, because all positions are originally produced by
   the lexer, every position must be the start position or the end position of
   some token. Thus, in between two tokens, we have *two* possible positions:
   the end position of the first token, and the start position of the second
   token. This creates a problem in the translation of a date to a position:
   which of the two positions should be used? A somewhat sensible approach,
   which we adopt, is to use the end position of the first token if the date
   is supposed to represent an end date, and to the use the start position of
   the second token if the date is supposed to represent a start date. *)

(* This approach still has a potentially surprising property. The empty date
   interval [\[i, i)] is translated to an interval [\[pos1, pos2)] where the
   position [pos2] possibly lies leftwards of the position [pos1]. To avoid
   this problem, perhaps a good rule of thumb is to avoid using the keywords
   [$startpos] and [$endpos] inside an epsilon production, and to avoid using
   the keywords [$startpos(x)] and [$endpos(x)] when the symbol denoted by [x]
   is nullable. Another idea is to just be aware of this problem and exchange
   the positions [pos1] and [pos2] in this case. *)

(* In this module, the vectors [startpos] and [endpos] record the start
   position and end position of the input symbols that have been consumed.
   These vectors serve as a basis in the translation of dates to
   positions. *)

type date =
  int

let[@inline] end_position input date =
  let endpos = input.endpos in
  assert (0 <= date && date <= V.length endpos);
  if 0 < date then
    V.unsafe_get endpos (date-1)
  else
    (* This case is problematic, but should arise only if we are asked about
       the end position of an epsilon production or of a nullable symbol. *)
    (* In this case, instead of the end position of the token at date [-1],
       which does not exist, we return the initial position of the input
       stream. *)
    input.initp

let[@inline] start_position input date =
  let startpos = input.startpos in
  assert (0 <= date && date <= V.length startpos);
  if date < V.length startpos then
    V.unsafe_get startpos date
  else
    (* This case is problematic, but should arise only if we are asked about
       the start position of an epsilon production or of a nullable symbol. *)
    (* In this case, instead of the start position of the token at [date],
       which does not exist, we return the end position of the previous
       token. *)
    end_position input date

package menhirGLR

Source file Input.ml

Source file `Input.ml`