Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file lexeme_intf.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318moduletypeCOMBI=sig(** {1 Combinators} *)(** Subset of the combinators of the module {!module:Character.Make}.
Detailed description see there. *)type_tvalreturn:'a->'atvalmap:('a->'b)->'at->'btval(let*):'at->('a->'bt)->'btval(</>):'at->'at->'atval(<?>):'at->string->'atvallocated:'at->'aLocated.ttvalunexpected:string->'atvalbacktrack:'at->string->'atvalchar:char->chartvalcharp:(char->bool)->string->chartvalone_of_chars:string->string->chartvalstring:string->stringtvaloptional:'at->'aoptiontvalzero_or_more_fold_left:'r->('r->'a->'rt)->'at->'rtvalone_or_more_fold_left:('a->'rt)->('r->'a->'rt)->'at->'rtvalskip_zero_or_more:'at->inttvaloperator_expression:'expt->'optoption->'opt->('op->'op->boolt)->('op->'exp->'expt)->('exp->'op->'exp->'expt)->'exptend(** Definition of Standard Tokens *)moduletypeLANG=sig(** {1 Basic Definitions for a Language} *)valwhitespace_chars:string(** Set of whitespace characters. A sequence of zero or more whitespace
characters is stripped off after each token.
Usually the set of whitespace characters is [" \t\n\r"] i.e. blanks,
tabs, newline and carriage return are whitespace characters.
*)valmultiline_comment:(string*string*bool)option(** [multiline_start, multiline_end, nested]
Optional definition of a multiline comment.
Precondition: The start and end of a multiline comment must not be
empty.
*)valline_comment:stringoption(** Optional start of a line comment. The comment spans to the end of the
line.
Precondition: The start of a line comment must not be empty.
*)validentifier_start:char->bool(** Legal characters to start an identifer. *)validentifier_inner:char->bool(** Legal characters in an identifer after the start character. *)valreserved_names:stringlist(** List of identifiers which are treated as reserved names (aka keywords).
*)endmoduletypeMAKE=functor(Combi:COMBI)(Lang:LANG)->sigopenCombi(** {1 Lexeme Parsers}
In a lexeme parser all tokens strip off whitespace which come after
the token. Therefore each combinator starts at a position in the
input stream which does not start any whitespace.
This is true for all tokens except the first token. Therefore the
whitespace at the beginning of the stream has to be stripped off
separately.
All tokens in this module strip off the whitespace comming after it.
If the parsing of any token fails, then an alternative combinator
can take over at the start position of the failed token. The module
does the necessary backtracking.
If the user of the module adds own tokens, the token shall satisfy
the same requirement. Strip off all whitespace after the token. In
case of failure of a multicharacter token backtracking is done
appropriately.
*)(** {1 Basic Lexeme Support} *)valwhitespace:intt(** Strip off any sequence of whitespace characters and comments. Return
the number of characters stripped off.
*)valwhitespace_before:'at->'at(** [whitespace_before p]
Strip off whitespace and then continue parsing with [p].
*)vallexeme:'at->'at(** [lexeme p]
Convert [p] to a lexeme parser i.e. strip off any whitespace after
successfully parsing with [p].
*)valtoken:string->'at->'at(** [token expect p]
Convert a token parser [p] which does not adhere to the conventions
of a lexeme parser (i.e. no whitespace stripped after the token, no
backtracking in case of failure) into a parser which respects the
conventions.
The string [expect] describes the expected token. It might appear in
error messages.
*)(** {1 Token}
All whitespace after tokens is stripped off. If a token fails, then
the next alternative can be checked at the start position of the
token. No [backtrack] is necessary.
*)valsemicol:chartvalcomma:chartvalcolon:chartvaldot:chartvalstring:string->stringLocated.ttvalunsigned_int:intLocated.ttvalint:intLocated.ttvalfloat:floatLocated.ttvalidentifier:stringLocated.ttvalreserved:string->stringLocated.tt(*val char_literal: char t
val string_literal: string t*)(** {1 Parenthesized Structures}
Note that all combinators parsing the inner part of the parentheses
have the type [unit -> 'a t]. The are called only if the opening
parenthesis has been parsed successfully. This makes it possible to
use the parenthesized structures recursively.
*)valparens:(unit->'at)->'atvalbraces:(unit->'at)->'atvalbrackets:(unit->'at)->'atvalangulars:(unit->'at)->'at(*val semicol_separated0: 'a t -> 'a list t
val semicol_separated1: 'a t -> 'a list t
val comma_separated0: 'a t -> 'a list t
val comma_separated1: 'a t -> 'a list t*)(** {1 Operator Expressions } *)typeassoc=Left|Right(** Associativity of an operator *)type'eunary_operation=Position.range->'eLocated.t->'et(** A unary operation is a function, mapping the position of the operator
and a located operand into the result. Note that the result is not
located. The library computes the location.
*)type'ebinary_operation='eLocated.t->Position.range->'eLocated.t->'et(** A unary operation is a function, mapping a located left operand, the
position of the operator and a located right operand into the
result. Note that the result is not located. The library computes
the location.
*)type'eoperation=|Unaryof'eunary_operation|Binaryof'ebinary_operation|Bothof'eunary_operation*'ebinary_operationtype'eoperator_table=(string*assoc*'eoperation)listlist(** An operator table describes the precedence, associativity and the
semantics of operators. It is a list of operators where each entry
in the list is a list of operators at the same precedence level.
The precedences are descending i.e. the first entry in the list are
the operators with the highest precedence.
The following example describes the addition, multiplication and
exponentian operators for floating point arithmetic. The addition
operators [+] and [-] are at the lowest precedence and the
exponentiation operator [^] has the highest precedence.
{[
[
[ "^", Right, Binary (lift_binary ( ** ))]
;
[ ("*", Left, Binary (lift_binary ( *. )));
("/", Left, Binary (lift_binary ( /. ))) ]
;
[ ("+", Left, Both (lift_unary (~+.), lift_binary (+.)));
("-", Left, Both (lift_unary (~-.), lift_binary (-.))) ]
]
]}
*)vallift_unary:('e->'e)->'eunary_operation(** [lift_unary f]
Lift the function [f] doing the unary operation into an ['e
unary_operation] ignoring the location information.
*)vallift_binary:('e->'e->'e)->'ebinary_operation(** [lift_binary f]
Lift the function [f] doing the binary operation into an ['e
binary_operation] ignoring the location information.
*)valexpression:stringt->((unit->'eLocated.tt)->'eLocated.tt)->'eoperator_table->'eLocated.tt(** [expression operator primary table]
Make a parser for operator expressions with the following arguments:
- [operator] Parsing combinator of an operator. The combinator needs
not respect the conventions for a lexeme parser. The function
handles stripping off whitespace and backtracking properly.
- [primary] Parsing combinator for primary expressions. Primary
expressions are either tokens (numbers, variables, ...) or more
complex expressions which are treated as atomic expressions (e.g.
parenthesized expressions, function calls, ...). The [primary]
combinator can use the generated expression parser recursively after
cosuming at least one character. This convention is necessary to
avoid unbounded recursion.
- [table] Table describing the operators.
Example: For parser computing floating point expressions use the
following:
{[
let operator: string t =
one_of_chars "+-*/^" "One of the operators [+,-,*,/,^]"
|> map (String.make 1)
in
let primary (expr: unit -> float Located.t t): float Located.t t =
float
</>
parens expr
in
expression operator primary table
]}
where [table] is the operator table describe above in the
description of {!type:operator_table}.
*)end