Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file parsing_hacks_js.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301(* Yoann Padioleau
*
* Copyright (C) 2010, 2013 Facebook
* Copyright (C) 2019 Yoann Padioleau
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleFlag=Flag_parsingmodulePI=Parse_infomoduleAst=Cst_jsmoduleT=Parser_jsmoduleTH=Token_helpers_jsmoduleF=Ast_fuzzy(*****************************************************************************)(* Prelude *)(*****************************************************************************)(* The goal for this module is to retag tokens
* (e.g., a T_LPAREN in T_LPAREN_ARROW)
* or insert tokens (e.g., T_VIRTUAL_SEMICOLON) to
* help the grammar remains simple and unambiguous. See
* lang_cpp/parsing/parsing_hacks.ml for more information about
* this technique.
*
* This module inserts fake virtual semicolons, which is known as
* Automatic Semicolon Insertion, or ASI for short.
* Those semicolons can be ommitted by the user (but really should not).
* ASI works in two steps:
* - certain tokens can not be followed by a newline (e.g., continue)
* and we detect those tokens in this file.
* - we also insert semicolons during error recovery in parser_js.ml. After
* all that was what the spec says.
* Note that we need both techniques. See parse_js.ml comment for
* the limitations of using just the second technique.
*
* reference:
* -http://www.bradoncode.com/blog/2015/08/26/javascript-semi-colon-insertion
* -http://www.ecma-international.org/ecma-262/6.0/index.html#sec-automatic-semicolon-insertion
*)(*****************************************************************************)(* Helpers *)(*****************************************************************************)(* obsolete *)letis_toplevel_keyword=function|T.T_IMPORT_|T.T_EXPORT_|T.T_VAR_|T.T_LET_|T.T_CONST_|T.T_FUNCTION_->true|_->false(* obsolete *)letrparens_of_iftoks=lettoks=Common.excludeTH.is_commenttoksinletstack=ref[]inletrparens_if=ref[]intoks+>Common2.iter_with_previous_opt(funprevx->(matchxwith|T.T_LPAREN_->Common.pushprevstack;|T.T_RPARENinfo->if!stack<>[]thenbeginlettop=Common2.pop2stackin(matchtopwith|Some(T.T_IF_)->Common.pushinforparens_if|_->())end|_->()));!rparens_if(*****************************************************************************)(* Entry point *)(*****************************************************************************)(* retagging:
* - '(' when part of an arrow expression
* - less: '<' when part of a polymorphic type (aka generic)
* - less: { when part of a pattern before an assignment
*)letfix_tokenstoks=trylettrees=Parse_fuzzy.mk_trees{Parse_fuzzy.tokf=TH.info_of_tok;kind=TH.token_kind_of_tok;}toksinletretag_lparen=Hashtbl.create101inletretag_keywords=Hashtbl.create101in(* visit and tag *)letvisitor=Ast_fuzzy.mk_visitor{Ast_fuzzy.default_visitorwithAst_fuzzy.ktrees=(fun(k,_)xs->(matchxswith|F.Parens(i1,_,_)::F.Tok("=>",_)::_res->Hashtbl.addretag_lpareni1true(* TODO: also handle typed arrows! *)|F.Tok("import",i1)::F.Parens_::_res->Hashtbl.addretag_keywordsi1true|_->());kxs)}invisitortrees;(* use the tagged information and transform tokens *)toks|>List.map(function|T.T_LPARENinfowhenHashtbl.memretag_lpareninfo->T.T_LPAREN_ARROW(info)|T.T_IMPORTinfowhenHashtbl.memretag_keywordsinfo->T.T_IDENTIFIER(PI.str_of_infoinfo,info)|x->x)withParse_fuzzy.Unclosed(msg,info)->if!Flag.error_recoverythentokselseraise(Lexer_js.Lexical_error(msg,info))(*****************************************************************************)(* ASI (Automatic Semicolon Insertion) part 1 *)(*****************************************************************************)letfix_tokens_ASIxs=letres=ref[]inletrecauxprevfxs=matchxswith|[]->()|e::l->ifTH.is_commentethenbeginCommon.pusheres;auxprevflendelsebeginfpreve;auxeflendinletpush_sc_before_xx=letfake=Ast.fakeInfoAttach(TH.info_of_tokx)inCommon.push(T.T_VIRTUAL_SEMICOLONfake)res;inletf=(funprevx->(matchprev,xwith|(T.T_CONTINUE_|T.T_BREAK_),_whenTH.line_of_tokx<>TH.line_of_tokprev->push_sc_before_xx;(* very conservative; should be any last(left_hand_side_expression)
* but for that better to rely on ASI via parse-error recovery;
* no ambiguity like for continue because
* if(true) x
* ++y;
* is not valid.
*)|(T.T_IDENTIFIER_|T.T_FALSE_|T.T_TRUE_),(T.T_INCR_|T.T_DECR_)whenTH.line_of_tokx<>TH.line_of_tokprev->push_sc_before_xx;|_->());Common.pushxres;)in(* obsolete *)letrparens_if=rparens_of_ifxsinlethrparens_if=Common.hashset_of_listrparens_ifin(* history: this had too many false positives, which forced
* to rewrite the grammar to add extra virtual semicolons which
* then make the whole thing worse
*)let_fobsolete=(funprevx->matchprev,xwith(* { } or ; } TODO: source of many issues *)|(T.T_LCURLY_|T.T_SEMICOLON_),T.T_RCURLY_->Common.pushxres;(* <not } or ;> } *)|_,T.T_RCURLY_->push_sc_before_xx;Common.pushxres;(* ; EOF *)|(T.T_SEMICOLON_),T.EOF_->Common.pushxres;(* <not ;> EOF *)|_,T.EOF_->push_sc_before_xx;Common.pushxres;(* }
* <keyword>
*)|T.T_RCURLY_,(T.T_IDENTIFIER_|T.T_IF_|T.T_SWITCH_|T.T_FOR_|T.T_VAR_|T.T_FUNCTION_|T.T_LET_|T.T_CONST_|T.T_RETURN_|T.T_BREAK_|T.T_CONTINUE_(* todo: sure? *)|T.T_THIS_|T.T_NEW_)whenTH.line_of_tokx<>TH.line_of_tokprev->push_sc_before_xx;Common.pushxres(* )
* <keyword>
*)(* this is valid only if the RPAREN is not the closing paren of an if*)|T.T_RPARENinfo,(T.T_VAR_|T.T_IF_|T.T_THIS_|T.T_FOR_|T.T_RETURN_|T.T_IDENTIFIER_|T.T_CONTINUE_)whenTH.line_of_tokx<>TH.line_of_tokprev&¬(Hashtbl.memhrparens_ifinfo)->push_sc_before_xx;Common.pushxres;(* ]
* <keyword>
*)|T.T_RBRACKET_,(T.T_FOR_|T.T_IF_|T.T_VAR_|T.T_IDENTIFIER_)whenTH.line_of_tokx<>TH.line_of_tokprev->push_sc_before_xx;Common.pushxres;(* <literal>
* <keyword>
*)|(T.T_IDENTIFIER_|T.T_NULL_|T.T_STRING_|T.T_REGEX_|T.T_FALSE_|T.T_TRUE_),(T.T_VAR_|T.T_IDENTIFIER_|T.T_IF_|T.T_THIS_|T.T_RETURN_|T.T_BREAK_|T.T_ELSE_)whenTH.line_of_tokx<>TH.line_of_tokprev->push_sc_before_xx;Common.pushxres;(* } or ; or , or =
* <keyword> col 0
*)|(T.T_RCURLY_|T.T_SEMICOLON_|T.T_COMMA_|T.T_ASSIGN_),_whenis_toplevel_keywordx&&TH.line_of_tokx<>TH.line_of_tokprev&&TH.col_of_tokx=0->Common.pushxres;(* <no ; or }>
* <keyword> col 0
*)|_,_whenis_toplevel_keywordx&&TH.line_of_tokx<>TH.line_of_tokprev&&TH.col_of_tokx=0->push_sc_before_xx;Common.pushxres;(* else *)|_,_->Common.pushxres;)inmatchxswith|[]->[]|x::_->letsentinel=letfake=Ast.fakeInfoAttach(TH.info_of_tokx)in(T.T_SEMICOLONfake)inauxsentinelfxs;List.rev!res