morbig

A trustworthy parser for POSIX shell
type atom =
| WordComponent of string * CST.word_component
| QuotingMark of quote_kind
| AssignmentMark
and quote_kind =
| SingleQuote
| DoubleQuote
| OpeningBrace
type lexing_context =
| Default
| AssignmentRHS of CST.name
type prelexer_state = {
lexing_context : lexing_context;
nesting_context : Nesting.t list;
buffer : atom list;
}
val initial_state : prelexer_state
val at_toplevel : prelexer_state -> bool
val enter_assignment_rhs : prelexer_state -> CST.name -> prelexer_state
val push_word_component : atom list -> (string * CST.word_component) -> atom list
val push_string : prelexer_state -> string -> prelexer_state
val parse_pattern : CST.word_component -> CST.word_component list
val push_character : prelexer_state -> char -> prelexer_state
val push_separated_string : prelexer_state -> string -> prelexer_state
val pop_character : atom list -> atom list
val push_word_closing_character : prelexer_state -> char -> prelexer_state

push_word_closing_character b c push a character c to mark it as part of the string representing the current word literal but with no interpretation as a word CSTs. Typically, if the word is "$(1)", the string representing the current word is "$(1)" so the character ')' must be pushed as part of this string representation but ')' is already taken care of in the word CST WordSubshell (_, _) associated to this word so we do not push ')' as a WordLiteral CST.

val string_of_word : CST.word -> string
val string_of_attribute : CST.variable_attribute -> string
val push_parameter : ?with_braces:bool -> ?attribute:CST.variable_attribute -> prelexer_state -> string -> prelexer_state
val string_of_atom : atom -> string
val contents_of_atom_list : atom list -> string
val string_of_atom_list : atom list -> string
val contents : prelexer_state -> string
val components_of_atom_list : atom list -> CST.word_component list
val components : prelexer_state -> CST.word_component list
val push_quoting_mark : quote_kind -> prelexer_state -> prelexer_state
val pop_quotation : quote_kind -> prelexer_state -> prelexer_state
val push_assignment_mark : prelexer_state -> prelexer_state
val is_assignment_mark : atom -> bool
val recognize_assignment : prelexer_state -> prelexer_state
val return : ?with_newline:bool -> Lexing.lexbuf -> prelexer_state -> Pretoken.t list -> (Pretoken.t * Lexing.position * Lexing.position) list

(return ?with_newline lexbuf current tokens) returns a list of pretokens consisting of, in that order:

  • WORD(w), where w is the contents of the buffer current in case the buffer current is non-empty;
  • all the elements of tokens;
  • NEWLINE, in case ?with_newline is true (default: false).

We know that tokens does not contain any Word pretokens. In fact, the prelexer produces Word pretokens only from contents he has collected in the buffer.

exception NotAWord of string
val word_of : (Pretoken.t * 'a * 'b) list -> CST.word
val located_word_of : (Pretoken.t * 'a * 'b) list -> CST.word * 'c * 'd
val escape_analysis : ?for_backquote:bool -> Nesting.t list -> prelexer_state -> int option

A double quote can be escaped if we are already inside (at least) two levels of quotation. For instance, if the input is <dquote> <dquote> <backslash><backslash> <dquote> <dquote> <dquote>, the escaped backslash is used to escape the quote character.

val escape_analysis_predicate : ?for_backquote:bool -> Nesting.t list -> prelexer_state -> bool
val escaped_backquote : prelexer_state -> bool
val escaped_single_quote : prelexer_state -> bool
val escaped_double_quote : prelexer_state -> bool
val nesting_context : prelexer_state -> Nesting.t list
val enter_double_quote : prelexer_state -> prelexer_state
val enter_here_document : bool -> string -> prelexer_state -> prelexer_state
val enter_braces : prelexer_state -> prelexer_state
val quit_double_quote : prelexer_state -> prelexer_state
val quit_braces : prelexer_state -> prelexer_state
val enter_backquotes : char -> int -> prelexer_state -> prelexer_state
val under_backquote : prelexer_state -> bool
val under_braces : prelexer_state -> bool
val under_backquoted_style_command_substitution : prelexer_state -> bool
val under_double_quote : prelexer_state -> bool
val under_real_double_quote : prelexer_state -> bool
val under_here_document : prelexer_state -> bool
val is_escaping_backslash : prelexer_state -> 'a -> char -> bool
val closest_backquote_depth : Nesting.t list -> int
val backquote_depth : prelexer_state -> int option
val found_current_here_document_delimiter : prelexer_state -> bool
val remove_contents_suffix : string -> string -> CST.word_cst -> string * CST.word_component list
val debug : ?rule:string -> Lexing.lexbuf -> prelexer_state -> unit