package morbig

  1. Overview
  2. Docs

Module Morbig.PrelexerStateSource

Sourcetype atom =
  1. | WordComponent of string * CST.word_component
  2. | QuotingMark of quote_kind
  3. | AssignmentMark
Sourceand quote_kind =
  1. | SingleQuote
  2. | DoubleQuote
  3. | OpeningBrace
Sourcetype lexing_context =
  1. | Default
  2. | AssignmentRHS of CST.name
Sourcetype prelexer_state = {
  1. lexing_context : lexing_context;
  2. nesting_context : Nesting.t list;
  3. buffer : atom list;
}
Sourceval initial_state : prelexer_state
Sourceval at_toplevel : prelexer_state -> bool
Sourceval enter_assignment_rhs : prelexer_state -> CST.name -> prelexer_state
Sourceval push_word_component : atom list -> (string * CST.word_component) -> atom list
Sourceval push_string : prelexer_state -> string -> prelexer_state
Sourceval parse_pattern : CST.word_component -> CST.word_component list
Sourceval push_character : prelexer_state -> char -> prelexer_state
Sourceval push_separated_string : prelexer_state -> string -> prelexer_state
Sourceval pop_character : atom list -> atom list
Sourceval push_word_closing_character : prelexer_state -> char -> prelexer_state

push_word_closing_character b c push a character c to mark it as part of the string representing the current word literal but with no interpretation as a word CSTs. Typically, if the word is "$(1)", the string representing the current word is "$(1)" so the character ')' must be pushed as part of this string representation but ')' is already taken care of in the word CST WordSubshell (_, _) associated to this word so we do not push ')' as a WordLiteral CST.

Sourceval string_of_word : CST.word -> string
Sourceval string_of_attribute : CST.variable_attribute -> string
Sourceval push_parameter : ?with_braces:bool -> ?attribute:CST.variable_attribute -> prelexer_state -> string -> prelexer_state
Sourceval string_of_atom : atom -> string
Sourceval contents_of_atom_list : atom list -> string
Sourceval string_of_atom_list : atom list -> string
Sourceval contents : prelexer_state -> string
Sourceval components_of_atom_list : atom list -> CST.word_component list
Sourceval components : prelexer_state -> CST.word_component list
Sourceval push_quoting_mark : quote_kind -> prelexer_state -> prelexer_state
Sourceval push_assignment_mark : prelexer_state -> prelexer_state
Sourceval is_assignment_mark : atom -> bool
Sourceval recognize_assignment : prelexer_state -> prelexer_state
Sourceval return : ?with_newline:bool -> Lexing.lexbuf -> prelexer_state -> Pretoken.t list -> (Pretoken.t * Lexing.position * Lexing.position) list

(return ?with_newline lexbuf current tokens) returns a list of pretokens consisting of, in that order:

  • WORD(w), where w is the contents of the buffer current in case the buffer current is non-empty;
  • all the elements of tokens;
  • NEWLINE, in case ?with_newline is true (default: false).

We know that tokens does not contain any Word pretokens. In fact, the prelexer produces Word pretokens only from contents he has collected in the buffer.

Sourceexception NotAWord of string
Sourceval word_of : (Pretoken.t * 'a * 'b) list -> CST.word
Sourceval located_word_of : (Pretoken.t * 'a * 'b) list -> CST.word * 'a * 'b
Sourceval escape_analysis : ?for_backquote:bool -> Nesting.t list -> prelexer_state -> int option

A double quote can be escaped if we are already inside (at least) two levels of quotation. For instance, if the input is <dquote> <dquote> <backslash><backslash> <dquote> <dquote> <dquote>, the escaped backslash is used to escape the quote character.

Sourceval escape_analysis_predicate : ?for_backquote:bool -> Nesting.t list -> prelexer_state -> bool
Sourceval escaped_backquote : prelexer_state -> bool
Sourceval escaped_single_quote : prelexer_state -> bool
Sourceval escaped_double_quote : prelexer_state -> bool
Sourceval nesting_context : prelexer_state -> Nesting.t list
Sourceval enter_double_quote : prelexer_state -> prelexer_state
Sourceval enter_here_document : bool -> string -> prelexer_state -> prelexer_state
Sourceval enter_braces : prelexer_state -> prelexer_state
Sourceval quit_double_quote : prelexer_state -> prelexer_state
Sourceval enter_backquotes : char -> int -> prelexer_state -> prelexer_state
Sourceval under_backquote : prelexer_state -> bool
Sourceval under_braces : prelexer_state -> bool
Sourceval under_backquoted_style_command_substitution : prelexer_state -> bool
Sourceval under_double_quote : prelexer_state -> bool
Sourceval under_real_double_quote : prelexer_state -> bool
Sourceval under_here_document : prelexer_state -> bool
Sourceval is_escaping_backslash : prelexer_state -> 'a -> char -> bool
Sourceval closest_backquote_depth : Nesting.t list -> int
Sourceval backquote_depth : prelexer_state -> int option
Sourceval found_current_here_document_delimiter : prelexer_state -> bool
Sourceval remove_contents_suffix : string -> string -> CST.word_cst -> string * CST.word_component list
Sourceval debug : ?rule:string -> Lexing.lexbuf -> prelexer_state -> unit