Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file token_views_context.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327(* Yoann Padioleau
*
* Copyright (C) 2014 Facebook
* Copyright (C) 2002-2008 Yoann Padioleau
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)openCommonopenParser_cppopenToken_views_cppmoduleTH=Token_helpers_cppmoduleTV=Token_views_cpp(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*****************************************************************************)(* Argument vs Parameter *)(*****************************************************************************)letlook_like_argument_tok_beforexs=(* normalize for C++ *)letxs=xs+>List.map(function|Tok({t=TAndii}asrecord)->Tok({recordwitht=TMulii})|x->x)in(* split by comma so can easily check if have stuff like '*xx'
* that takes the full argument
*)letxxs=split_commaxsinletaux1xs=matchxswith|[]->false(* *xx (note: actually can also be a function pointer decl) *)|[Tok{t=TMul_};Tok{t=TIdent_}]->true(* *(xx) *)|[Tok{t=TMul_};Parens_]->true(* TODO: xx * yy and space = 1 between the 2 :) *)|_->falseinletrecauxxs=matchxswith|[]->false(* a function call probably *)|Tok{t=TIdent_}::Parens_::_xs->(* todo? look_like_argument recursively in Parens || aux xs ? *)true(* if have = ... then must stop, could be default parameter of a method *)|Tok{t=TEq_}::_xs->false(* could be part of a type declaration *)|Tok{t=TOCro_}::Tok{t=TCCro_}::_xs->false|Tok{t=TOCro_}::Tok{t=(TInt_)}::Tok{t=TCCro_}::_xs->false|Tok{t=TOCro_}::Tok{t=(TIdent_)}::Tok{t=TCCro_}::_xs->false|x::xs->(matchxwith|Tok{t=(TInt_|TFloat_|TChar_|TString_)}->true|Tok{t=(Ttrue_|Tfalse_)}->true|Tok{t=(Tthis_)}->true|Tok{t=(Tnew_)}->true|Tok{t=tok}whenTH.is_binary_operator_except_startok->true|Tok{t=(TInc_|TDec_)}->true|Tok{t=(TDot_|TPtrOp_|TPtrOpStar_|TDotStar_)}->true|Tok{t=(TOCro_)}->true|Tok{t=(TWhy_|TBang_)}->true|_->auxxs)in(* todo? what if they contradict each other? if one say arg and
* the other a parameter?
*)xxs+>List.existsaux1||auxxsletlook_like_typedefs=s=~".*_t$"||s="ulong"||s="uchar"||s="uvlong"||s="vlong"||s="uintptr"(* plan9, but actually some fp such as Paddr which is actually a macro *)(* || s =~ "[A-Z][a-z].*$" *)(* with DECLARE_BOOST_TYPE, but have some false positives
* when people do xx* indexPtr = const_cast<>(indexPtr);
*)(* s =~ ".*Ptr$" *)(* || s = "StringPiece" *)(* todo: pass1, look for const, etc
* todo: pass2, look xx_t, xx&, xx*, xx**, see heuristics in typedef
*
* Many patterns should mimic some heuristics in parsing_hack_typedef.ml
*)letlook_like_parametertok_beforexs=(* normalize for C++ *)letxs=xs+>List.map(function|Tok({t=TAndii}asrecord)->Tok({recordwitht=TMulii})|x->x)inletxxs=split_commaxsinletaux1xs=matchxswith|[]->false(* xx_t *)|[Tok{t=TIdent(s,_)}]whenlook_like_typedefs->true(* xx* *)|[Tok{t=TIdent_};Tok{t=TMul_}]->true(* xx** *)|[Tok{t=TIdent_};Tok{t=TMul_};Tok{t=TMul_}]->true(* xx * y could be multiplication (or xx & yy) ..
* todo: could look if space around :) but because of the
* filtering of template and qualifier the no_space_between
* may not be completely accurate here. May need lower level access
* to the list of TCommentSpace and their position.
* hmm but can look at col?
*
* C-s for parameter_decl in grammar to see that catch() is
* a InParameter.
*)|[Tok{t=TIdent_};Tok{t=TMul_};Tok{t=TIdent_};]->(matchtok_beforewith|Tok{t=(Tcatch_(* ugly: TIdent_Constructor interaction between past heuristics *)|TIdent_Constructor_|Toperator_(* no! | TIdent _ *))}->true|_->false)|_->falseinletrecauxxs=matchxswith|[]->false(* xx yy *)|Tok{t=TIdent_}::Tok{t=TIdent_}::_xs->true|x::xs->(matchxwith|Tok{t=tok}whenTH.is_basic_typetok->true|Tok{t=(Tconst_|Tvolatile_)}->true|Tok{t=(Tstruct_|Tunion_|Tenum_|Tclass_)}->true|_->auxxs)inxxs+>List.existsaux1||auxxs(*****************************************************************************)(* Main heuristics *)(*****************************************************************************)(*
* Most of the important contexts are introduced via some '{' '}'. To
* disambiguate is it often enough to just look at a few tokens before the
* '{'.
*
* Below we assume a view without:
* - comments
* - cpp directives
*
* todo
* - handle more C++ (right now I did it mostly to be able to parse plan9)
* - harder now that have c++, can have function inside struct so need
* handle all together.
* - change token but do not recurse in
* nested Braceised. maybe do via accumulator, don't use iter_token_brace?
* - need remove the qualifier as they make the sequence pattern matching
* more difficult?
*)letset_context_tag_multigroups=letrecauxxs=matchxswith|[]->()(* struct Foo {, also valid for class and union *)|Tok{t=(Tstruct_|Tunion_|Tclass_)}::Tok{t=TIdent(s,_)}::(Braces(_t1,_body,_t2)asbraces)::xs->[braces]+>TV.iter_token_multi(funtok->tok.TV.where<-(TV.InClassStructs)::tok.TV.where;);aux(braces::xs)|Tok{t=(Tstruct_|Tunion_)}::(Braces(_t1,_body,_t2)asbraces)::xs->[braces]+>TV.iter_token_multi(funtok->tok.TV.where<-(TV.InClassStruct"__anon__")::tok.TV.where;);aux(braces::xs)(* = { } *)|Tok({t=TEq_;_})::(Braces(_t1,_body,_t2)asbraces)::xs->[braces]+>TV.iter_token_multi(funtok->tok.TV.where<-InInitializer::tok.TV.where;);aux(braces::xs)(* enum xxx { InEnum *)|Tok{t=Tenum_}::Tok{t=TIdent(_,_)}::(Braces(_t1,_body,_t2)asbraces)::xs|Tok{t=Tenum_}::(Braces(_t1,_body,_t2)asbraces)::xs->[braces]+>TV.iter_token_multi(funtok->tok.TV.where<-TV.InEnum::tok.TV.where;);aux(braces::xs)(* C++: class Foo : ... { *)|Tok{t=Tclass_|Tstruct_}::Tok{t=TIdent(s,_)}::Tok{t=TColii}::xs->let(before,braces,after)=tryxs+>Common2.split_when(function|Braces_->true|_->false)withNot_found->raise(UnclosedSymbol(spf"PB with split_when at %s"(Parse_info.string_of_infoii)))inauxbefore;[braces]+>TV.iter_token_multi(funtok->tok.TV.where<-(TV.InClassStructs)::tok.TV.where;);aux[braces];auxafter(* need to look what was before to help the look_like_xxx heuristics
*
* The order of the 3 rules below is important. We must first try
* look_like_argument which has less FP than look_like_parameter
*)|x::(Parens(_t1,body,_t2)asparens)::xswhenlook_like_argumentxbody->(*msg_context t1.t (TV.InArgument); *)[parens]+>TV.iter_token_multi(funtok->tok.TV.where<-(TV.InArgument)::tok.TV.where;);(* todo? recurse on body? *)aux[x];aux(parens::xs)(* C++: special cases *)|(Tok{t=Toperator_}astok1)::tok2::(Parens(_t1,body,_t2)asparens)::xswhenlook_like_parametertok1body->(* msg_context t1.t (TV.InParameter); *)[parens]+>TV.iter_token_multi(funtok->tok.TV.where<-(TV.InParameter)::tok.TV.where;);(* recurse on body? hmm if InParameter should not have nested
* stuff except when pass function pointer
*)aux[tok1;tok2];aux(parens::xs)|x::(Parens(_t1,body,_t2)asparens)::xswhenlook_like_parameterxbody->(* msg_context t1.t (TV.InParameter); *)[parens]+>TV.iter_token_multi(funtok->tok.TV.where<-(TV.InParameter)::tok.TV.where;);(* recurse on body? hmm if InParameter should not have nested
* stuff except when pass function pointer
*)aux[x];aux(parens::xs)(* void xx() *)|Tok{t=typ}::Tok{t=TIdent_}::(Parens(_t1,_body,_t2)asparens)::xswhenTH.is_basic_typetyp->(* msg_context t1.t (TV.InParameter); *)[parens]+>TV.iter_token_multi(funtok->tok.TV.where<-(TV.InParameter)::tok.TV.where;);aux(parens::xs)|x::xs->(matchxwith|Tok_t->()|Parens(_t1,xs,_t2)|Braces(_t1,xs,_t2)|Angle(_t1,xs,_t2)->auxxs);auxxsin(* sane initialization *)groups+>TV.iter_token_multi(funtok->tok.TV.where<-[TV.InTopLevel];);auxgroups(*****************************************************************************)(* Main heuristics C++ *)(*****************************************************************************)(*
* assumes a view without:
* - template arguments, qualifiers,
* - comments and cpp directives
* - TODO public/protected/... ?
*)letset_context_tag_cplusgroups=set_context_tag_multigroups