Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file database_light_ml.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleFlag=Flag_parsingmoduleDb=Database_codemoduleHC=Highlight_codemodulePI=Parse_infomoduleT=Parser_ml(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* Obsolete file: see the -db_of_graph_code option of codegraph and
* graph_code_cmt.ml for a more complete implementation.
*
* Light database building for OCaml code (mainly used by the codemap
* semantic code visualizer). We currently abuse the code highlighter
* to extract the entity defintions, and for the uses we are mainly using
* the list of tokens ... not the AST.
*
* We build the full database in multiple steps as some
* operations need the information computed globally by the
* previous step:
*
* - collect all definitions and their files
* - collect all uses, updating the count number of the
* corresponding entity (if it's used in a different file)
* as well as the entity->test_files_using_it hash.
*
* Currently many analysis are just lexical-based (yes I know, I am
* ridiculous) so there is some ambiguity when we find a use such
* as a function call. We don't always know to which precise entity
* it corresponds to.To be precise would require to resolve module name.
* Fortunately in my code I don't use 'open' that much and only use the
* simple alias-module idiom which makes it tractable to
* identify in practice to which entity a qualified function call refers to.
*
*)(*****************************************************************************)(* Types *)(*****************************************************************************)(* poor's man id for now. It's quite close to the fullid we have in
* database_php.ml.
*)typeentity_poor_id=Idof(Common.filename*Common2.filepos)(*****************************************************************************)(* Helpers *)(*****************************************************************************)letis_pleac_filefile=letfile=String.lowercase_asciifileinfile=~".*pleac*"(* todo? quite pad specific ...
* try detect when use OUnit ?
*)letis_test_filefile=letfile=String.lowercase_asciifilein(file=~".*/test_"||file=~".*/unit_")letis_test_or_pleac_filefile=is_test_filefile||is_pleac_filefileletentity_poor_id_of_entitye=Id(e.Db.e_file,e.Db.e_pos)(* give a score per id and then sort and return top k *)letrank_and_filter_examples_of_use~rootidsentities_arr=ids+>List.map(funid->letfile=entities_arr.(id).Db.e_fileinletfile=Filename.concatrootfileinletsize=Common2.filesizefilein(* Low means better; so prefer small size and pleac files *)letscore=size/(ifis_pleac_filefilethen4else1)inscore,id)+>Common.sort_by_key_lowfirst+>List.mapsndletparsefile=Common.save_excursionFlag.error_recoverytrue(fun()->Common.save_excursionFlag.show_parsing_errorfalse(fun()->Parse_ml.parsefile))(*****************************************************************************)(* Main entry point *)(*****************************************************************************)letcompute_database?(verbose=false)files_or_dirs=letroot=Common2.common_prefix_of_files_or_dirsfiles_or_dirsinletroot=Common2.chop_dirsymbolrootinifverbosethenpr2(spf"generating ML db_light with root = %s"root);letfiles=Lib_parsing_ml.find_source_files_of_dir_or_filesfiles_or_dirsinletdirs=files+>List.mapFilename.dirname+>Common2.uniq_effin(* PHASE 1: collecting definitions *)ifverbosethenpr2(spf"PHASE 1: collecting definitions");let(hdefs:(string,Db.entity)Hashtbl.t)=Hashtbl.create1001in(* This is used later when one wants to get the first id of a file.
*
* This is just because in step2 when we are collecting uses we
* don't know in which entity we currently are but we know in
* which file we are and for the good_examples_of_use we really
* just need to give one of the id in the (supposidely small) test_or_pleac
* file.
*
* todo: once we have a real callgraph we will not need this anymore.
*)let(hfile_to_entities:(filename,entity_poor_id)Hashtbl.t)=Hashtbl.create1001infiles+>Console.progress~show:verbose(funk->List.iter(funfile->k();let((ast,toks),_stat)=parsefileinletast=matchastwith(* in database light we do error recovery *)|None->[]|Somexs->xsin(* this is quite similar to what we do in tags_ml.ml *)letprefs=Highlight_code.default_highlighter_preferencesinHighlight_ml.visit_program~lexer_based_tagger:true(* !! *)~tag_hook:(funinfocateg->(* todo: use is_entity_def_category ? *)matchcategwith|HC.Entity(_,HC.Def2_)|HC.FunctionDecl_->lets=PI.str_of_infoinfoinletl=PI.line_of_infoinfoinletc=PI.col_of_infoinfoinletfile=Parse_info.file_of_infoinfo+>Common.readable~rootinletmodule_name=Module_ml.module_name_of_filenamefileinletfullpath=Parse_info.file_of_infoinfoin(* stuff in mli is ok only where there is no .ml, like
* for the externals/core/ stuff
*)let(d,b,e)=Common2.dbe_of_filenamefullpathinife="ml"||(e="mli"&¬(Sys.file_exists(Common2.filename_of_dbe(d,b,"ml"))))thenbeginletentity={Database_code.e_name=s;e_fullname=spf"%s.%s"module_names;e_file=file;e_pos={Common2.l=l;c};e_kind=Common2.some(Db.entity_kind_of_highlight_category_defcateg);(* filled in step 2 *)e_number_external_users=0;e_good_examples_of_use=[];(* TODO once we have a real parser, can at least
* set the UseGlobal property.
*)e_properties=[];}in(* todo? could be more precise and add the Modulename.s
* in the hash so that we don't need to call
* Hashtbl.find_all but just Hashtbl.find later ?
*)Hashtbl.addhdefssentity;Hashtbl.addhfile_to_entitiesfile(entity_poor_id_of_entityentity);end;|_->())prefs(ast,toks)));(* PHASE 2: collecting uses *)ifverbosethenpr2(spf"PHASE 2: collecting uses");letentities_arr=Common.hash_to_listhdefs+>List.mapsnd+>Array.of_listin(* this is useful when we want to add cross-references in the entities
* such as the good_examples_of_use that reference another Db.entity_id.
*)let(h_id_mldb_to_id_db:(entity_poor_id,Db.entity_id)Hashtbl.t)=Hashtbl.create1001inentities_arr+>Array.iteri(funid_dbe->letid_mldb=entity_poor_id_of_entityeinHashtbl.addh_id_mldb_to_id_dbid_mldbid_db;);(* todo: could rank later.
* so would need a first phase where we collect with
* let (hentity_to_test_files_using_it:
* (entity_poor_id, Common.filename) Hashtbl.t) =
* Hashtbl.create 101 in
* ?
*
* For now the granularity of the goto_example is entity ->
* test_files_using_it instead of test_functions_that_use_it
* because we don't have the full callgraph and different
* entities id as in database_php.ml. We could try to identify
* in which entity a function call is by reusing the highlight/visitor
* above and tracking the tokens and what was the last entity
* encountered.
*)letadd_good_example_of_usetest_fileentity=letpoor_id_opt=Common2.hfind_optiontest_filehfile_to_entitiesin(matchpoor_id_optwith|None->pr2(spf"WEIRD, could not find an entity in %s"test_file)|Somepoor_id_user->letid_user=Hashtbl.findh_id_mldb_to_id_dbpoor_id_userin(* could do a take_safe 3 but for ocaml I don't think we have
* any scaling issues
*)entity.Db.e_good_examples_of_use<-(id_user::entity.Db.e_good_examples_of_use);)infiles+>Console.progress~show:verbose(funk->List.iter(funfile->k();iffile=~".*external/"&&(* I don't really want pleac files to participate in the
* e_number_external_users statistics but I want pleac files
* to participate in the e_good_examples_of_use so have
* to special case it here. Could introduce a step3 phase ...
*)not(file=~".*pleac/")thenpr2(spf"skipping external file: %s"file)elsebeginlet((_ast,toks),_stat)=parsefileinletfile=Common.readable~rootfilein(* try to resolve function use more precisely instead of incrementing
* all entities that have xxx as a name. Look if the module name
* match the basename of the file defining the entity.
* But have to remember the module X = XXX aliases.
*)lethmodule_aliases=Hashtbl.create11inlettoks=toks+>Common.exclude(function|T.TCommentSpace_->true|_->false)in(* Only consider Module.xxx. Otherwise names such as 'x', or 'yylex'
* which are variables or internal functions are considered
* as having a huge count.
*
*)letrecaux_tokstoks=matchtokswith|T.Tmodule_::T.TUpperIdent(s,_ii)::T.TEq_::T.TUpperIdent(s2,_ii2)::xs->(* we want to transform every occurence of s into s2,
* to remove the alias sugar
*)Hashtbl.addhmodule_aliasesss2;aux_toksxs|T.TUpperIdent(s,_ii)::T.TDot_ii2::T.TLowerIdent(s2,_ii3)::xs->Hashtbl.find_allhdefss2+>List.iter(funentity->letfile_entity=entity.Db.e_fileinletfinal_module_name=ifHashtbl.memhmodule_aliasessthenHashtbl.findhmodule_aliasesselsesinletmodule_entity=let(_d,b,_e)=Common2.dbe_of_filenamefile_entityinString.capitalize_asciibiniffile_entity<>file&&final_module_name=module_entitythenbeginentity.Db.e_number_external_users<-entity.Db.e_number_external_users+1;ifis_test_or_pleac_filefilethenadd_good_example_of_usefileentity;end);aux_toksxs|[]->()|_x::xs->aux_toksxsinaux_tokstoks;end));(* PHASE 3: adjusting entities *)ifverbosethenpr2(spf"PHASE 3: adjusting entities");entities_arr+>Array.iter(fune->letids=e.Db.e_good_examples_of_useine.Db.e_good_examples_of_use<-rank_and_filter_examples_of_use~rootidsentities_arr;);letdirs=dirs+>List.map(funs->Common.readable~roots)inletdirs=Db.alldirs_and_parent_dirs_of_relative_dirsdirsin{Db.root=root;dirs=dirs+>List.map(fund->d,0);(* TODO *)files=files+>List.map(funf->Common.readable~rootf,0);(* TODO *)entities=entities_arr;}