package pfff

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file tags_file.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
(* Yoann Padioleau
 *
 * Copyright (C) 2010-2012 Facebook
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * version 2.1 as published by the Free Software Foundation, with the
 * special exception on linking described in file license.txt.
 * 
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 * license.txt for more details.
 *)
open Common

module PI = Parse_info
module E = Entity_code

(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)
(*
 * Generating TAGS file (for emacs or vim)
 *
 * Supposed syntax for emacs TAGS (.tags) files, as analysed from output
 * of etags, read in etags.c and discussed with Francesco Potorti.
 * src: otags readme:
 * 
 * <file> ::= <page>+
 * <page> ::= <header><body>
 * <header> ::= <NP><CR><file-name>,<body-length><CR>
 * <body> ::= <tag-line>*
 * <tag-line> ::= <prefix><DEL><tag><SOH><line-number>,<begin-char-index><CR>
 * pad: when tag is already at the beginning of the line:
 * <tag-line> ::=<tag><DEL><line-number>,<begin-char-index><CR>
 * 
 * <NP> ::= ascii NP, (emacs ^L) 
 * <DEL> ::= ascii DEL, (emacs ^?)
 * <SOH> ::= ascii SOH, (emacs ^A)
 * <CR> :: ascii CR
 * 
 * See also http://en.wikipedia.org/wiki/Ctags#Tags_file_formats
 *)

(*****************************************************************************)
(* Types *)
(*****************************************************************************)

(* see http://en.wikipedia.org/wiki/Ctags#Tags_file_formats *)
let header = "\x0c\n"

let footer = ""

type tag = {
  tag_definition_text: string;
  tagname: string;
  line_number: int;
  (* offset of beginning of tag_definition_text, when have 0-indexed filepos *)
  byte_offset: int;
  (* only used by vim *)
  kind: Entity_code.entity_kind;
}

let mk_tag s1 s2 i1 i2 k = {
  tag_definition_text = s1;
  tagname = s2;
  line_number = i1;
  byte_offset = i2;
  kind = k;
}

(*****************************************************************************)
(* Helpers *)
(*****************************************************************************)

let string_of_tag t = 
  spf "%s\x7f%s\x01%d,%d\n" 
    t.tag_definition_text
    t.tagname
    t.line_number
    t.byte_offset

(* of tests/misc/functions.php *)
(*
let fake_defs = [
  mk_tag "function a() {" "a" 3 7;
  mk_tag "function b() {" "b" 7 32;
  mk_tag "function c() {" "c" 14 65;
  mk_tag "function d() {" "d" 20 107;
]
*)

(* helpers used externally by language taggers *)
let tag_of_info filelines info kind =
  let line = PI.line_of_info info in
  let pos = PI.pos_of_info info in
  let col = PI.col_of_info info in
  let s = PI.str_of_info info in
  mk_tag (filelines.(line)) s line (pos - col) kind

(* C-s for "kind" in http://ctags.sourceforge.net/FORMAT *)
let vim_tag_kind_str tag_kind =
  match tag_kind with
  | E.Class -> "c"
  | E.Constant -> "d"
  | E.Function -> "f"
  | E.Method -> "f"
  | E.Type -> "t"
  | E.Field -> "m"

  | E.Module | E.Package
  | E.Global | E.Macro
  | E.TopStmts
  | E.Other _
  | E.ClassConstant
  | E.Constructor

  | E.File | E.Dir | E.MultiDirs
  | E.Exception
  | E.Prototype | E.GlobalExtern
      -> ""

(* vim uses '/' as a marker for the tag definition text, so if this
 * test contains '/' they must be escaped.
 *)
let vim_escape_slash str =
  Str.global_replace (Str.regexp "/") "\\/" str


(* For methods, in addition to the tag for the precise 'class::method'
 * name, it can be convenient to generate another tag with just the
 * 'method' name so people can quickly jump to some code with just the
 * method name. Of course if there is also a function somewhere using the
 * same name then this function could be hard to reach so we generate
 * an (imprecise) method tag only when there is no ambiguity. 
 *)
let add_method_tags_when_unambiguous files_and_defs =

  (* step1: global analysis on all defs, remember all names and methods *)
  let h_toplevel_names = 
    files_and_defs +> List.map (fun (_file, tags) ->
      tags +> Common.map_filter (fun t ->
        match t.kind with
        | E.Class | E.Function | E.Constant -> Some t.tagname
        | _ -> None
      )
    ) +> List.flatten +> Common.hashset_of_list
  in
  let h_grouped_methods =
    files_and_defs +> List.map (fun (_file, tags) ->
      tags +> Common.map_filter (fun t ->
        match t.kind with
        | E.Method ->
            if t.tagname =~ ".*::\\(.*\\)"
            then Some (Common.matched1 t.tagname, t)
            else failwith ("method tag should contain '::[, got: " ^ t.tagname)
        | _ -> None
      )
    (* could skip the group_assoc_bykey and do Hashtbl.find_all below instead *)
    ) +> List.flatten +> Common.group_assoc_bykey_eff +> Common.hash_of_list
  in
  (* step2: add method tag when no ambiguity *)
  files_and_defs +> List.map (fun (file, tags) ->
    file,
    tags +> List.map (fun t ->
      match t.kind with
      | E.Method ->
          if t.tagname =~ ".*::\\(.*\\)"
          then
            let methodname = Common.matched1 t.tagname in
            if not (Hashtbl.mem h_toplevel_names methodname) &&
               List.length (Hashtbl.find h_grouped_methods methodname) = 1
            then [t; { t with tagname = methodname }]
            else [t]
          else failwith("method tag should contain '::[, got: " ^ t.tagname)
      | _ -> [t]
    ) +> List.flatten
  )

(*****************************************************************************)
(* Main entry point *)
(*****************************************************************************)

let threshold_long_line = 1000

let generate_TAGS_file tags_file files_and_defs =
  Common.with_open_outfile tags_file (fun (pr_no_nl, _chan) ->
    pr_no_nl header;
    files_and_defs +> List.iter (fun (file, defs) ->
      let all_defs = defs +> Common.map_filter (fun tag ->
        if String.length tag.tag_definition_text > threshold_long_line
        then begin 
          pr2_once (spf "WEIRD long string in %s, passing the tag" file);
          None
        end 
        else Some (string_of_tag tag)
      ) +> Common.join "" in
      let size_defs = String.length all_defs in
      pr_no_nl (spf "%s,%d\n" file size_defs);
      pr_no_nl all_defs;
      pr_no_nl "\x0c\n";
    );
  );
  ()

(* http://vimdoc.sourceforge.net/htmldoc/tagsrch.html#tags-file-format *)
let generate_vi_tags_file tags_file files_and_defs =
  Common.with_open_outfile tags_file (fun (pr_no_nl, _chan) ->

    let all_tags =
      files_and_defs +> List.map (fun (file, defs) ->
        defs +> Common.map_filter (fun tag ->
          if String.length tag.tag_definition_text > 300
          then begin 
            pr2 (spf "WEIRD long string in %s, passing the tag" file);
            None
          end 
          else Some (tag.tagname, (tag, file))
        )) 
      +> List.flatten
      +> Common.sort_by_key_lowfirst
    in
    all_tags +> List.iter (fun (_tagname, (tag, file)) ->
      (* {tagname}<Tab>{tagfile}<Tab>{tagaddress} 
       * "The two characters semicolon and double quote [...] are
       * interpreted by Vi as the start of a comment, which makes the
       * following be ignored."
       *)
      pr_no_nl (spf "%s\t%s\t/%s/;\"\t%s\n"
                   tag.tagname
                   file
                   (vim_escape_slash tag.tag_definition_text)
                   (vim_tag_kind_str tag.kind)
      );
    );
  )