package ez_search

  1. Overview
  2. Docs

Source file main.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
(**************************************************************************)
(*                                                                        *)
(*  Copyright (c) 2021 OCamlPro SAS                                       *)
(*                                                                        *)
(*  All rights reserved.                                                  *)
(*  This file is distributed under the terms of the GNU Lesser General    *)
(*  Public License version 2.1, with the special exception on linking     *)
(*  described in the LICENSE.md file in the root directory.               *)
(*                                                                        *)
(*                                                                        *)
(**************************************************************************)

open V1  (* from outside, should be: open Ez_search.V1 *)
open EzSearch.TYPES

open Ez_file.V1
open EzFile.OP

let find_term ~db ~is_case_sensitive ~is_regexp
    ~lines ~maxn ~verbose ~engine ~ncores term =

  let find =
    match engine with
    | "default" | "chambart" ->
        None
    | _ ->
        let engine = match engine with
          | "memmem" ->
              if is_regexp then
                failwith "No regexp with memmem";
              if not is_case_sensitive then
                failwith "Only case sensitive with memmem";
              fun ~pos ~len haystack ->
                EzSearch.memmem ~haystack ~pos ~len ~needle:term
                  (*
          | "pcre" -> (* does not work correctly, why ? *)
              let rex, pat = match is_regexp, is_case_sensitive with
                | true, true -> Some ( Pcre.regexp term ), None
                | true, false -> Some ( Pcre.regexp term), None
                | false, true -> None, Some term
                | false, false -> None, Some term
              in
              fun ~pos ~len s ->
                let t = Pcre.pcre_exec ~len ?rex ?pat ~pos s in
                if Array.length t = 0 then exit 13;
                if Array.length t <> 1 then
                  Printf.printf "len=%d\n%!" ( Array.length t );
                t.(0)
*)
          | "str" ->
              let regexp = match is_regexp, is_case_sensitive with
                | true, true -> Str.regexp term
                | true, false -> Str.regexp_case_fold term
                | false, true -> Str.regexp_string term
                | false, false -> Str.regexp_string_case_fold term
              in
              fun ~pos ~len s ->
                Str.search_forward ~len regexp s pos
          | "re" ->
              let regexp = match is_regexp, is_case_sensitive with
                | true, true -> ReStr.regexp term
                | true, false -> ReStr.regexp_case_fold term
                | false, true -> ReStr.regexp_string term
                | false, false -> ReStr.regexp_string_case_fold term
              in
              fun ~pos ~len s ->
                ReStr.search_forward ~len:(len-pos) regexp s pos
          | _ ->
              Printf.eprintf "Error: unknown engine %S (should be: str|pcre|re)\n%!"
                engine;
              exit 2
        in
        Some engine
  in
  let print_occ pos =
    let occ = EzSearch.occurrence_file ~db pos in
    let file = occ.occ_file in

    Printf.printf "%s:%s\n%!" file.file_entry file.file_name ;
    let line = EzSearch.occurrence_line ~db occ in
    let c = EzSearch.occurrence_context ~db ~line occ ~max:lines in
    List.iter (fun ( line, s ) ->
        Printf.printf "%4d  %s\n%!" line s
      ) c.prev_lines ;
    Printf.printf "%4d--%s (position: %d)\n%!" line
      c.curr_line c.curr_pos;
    List.iter (fun ( line, s ) ->
        Printf.printf "%4d  %s\n%!" line s
      ) c.next_lines ;
  in
  let f () =
    let total, total_occs =
      EzSearch.search_and_count ~db ~is_regexp ~is_case_sensitive
        ~ncores ~maxn ?find term
    in
    if verbose then
      Printf.eprintf "Found %d occurrences\n%!" total;
    let n = ref 0 in
    List.iter (fun occ ->
        if !n < maxn then
          print_occ occ;
        incr n;
      ) total_occs
  in
  if verbose then
    EzSearch.time "Search" f ()
  else f ()

let home_dir = match Sys.getenv "HOME" with
  | home_dir -> home_dir
  | exception _ -> "/root"

let db_dir_default = home_dir // ".opam" // "ocp-search"

let main () =

  let to_index = ref None in
  let db_dir = ref db_dir_default in
  let sources = ref true in
  let search = ref None in
  let is_regexp = ref false in
  let is_case_sensitive = ref true in
  let count_lines = ref false in
  let use_mapfile = ref true in
  let lines = ref 1 in
  let n = ref 10 in
  let content = ref None in
  let verbose = ref true in
  let engine = ref "str" in
  let ncores = ref max_int in

  let arg_list = Arg.align  [

      "--index", Arg.String (fun dir -> to_index := Some dir),
      "DIR Index directory. The directory should contain a list of directories, each one containing the sources of the package (the directory name will be used as an 'entry', and all filenames will be relative to this directory inside the package).";

      "--count", Arg.Set count_lines,
      " Print number of lines in database";

      "-i", Arg.Clear is_case_sensitive,
      " Search in case insensitive way";

      "--string", Arg.String (fun term ->
          is_regexp := false ;
          search := Some term),
      "TERM Term to search";

      "--regexp", Arg.String (fun term ->
          is_regexp := true ;
          search := Some term),
      "TERM Term to search";

      "--no-mmap", Arg.Clear use_mapfile,
      " Do not map file in memory";

      "--lines", Arg.Int ( (:=) lines ),
      "NLINES Number of lines of context to print";

      "--engine", Arg.String ( (:=) engine),
      "ENGINE Choose regexp engine (str|pcre|re)";

      "-n", Arg.Int ( (:=) n ),
      "NBR Maximal number of occurrences";

      "--file", Arg.String (fun s -> content := Some s),
      "ENTRY:FILENAME Dump content of filename";

      "-q", Arg.Clear verbose,
      " Do not display debug info";

      "--build", Arg.Clear sources,
      " Index/search build files (dune and Makefile). By default, ocp-search scans and searches source files.";

      "--ncores", Arg.Int ( (:=) ncores ),
      "NCORES Set number of cores to use (auto)";

      "--db-dir", Arg.String (fun s -> db_dir := s),
      "DIR Directory where the database should be created/found. ~/.opam/ocp-search by default";
    ]

  in
  let arg_usage = "ocp-search [ARGS]: index and search sources" in
  Arg.parse arg_list
    (fun arg ->
       Printf.eprintf "Error: unexpected argument %S\n%!" arg;
       Arg.usage arg_list arg_usage ;
       exit 2)
    arg_usage;

  let work_done = ref false in

  let db_dir = !db_dir in
  let use_mapfile = !use_mapfile in
  let pwd = Sys.getcwd () in
  let db_dir =
    if Filename.is_relative db_dir then
      pwd // db_dir
    else
      db_dir
  in

  let db_name = if !sources then "sources" else "build" in

  begin
    match !to_index with
    | None -> ()
    | Some dir ->
        work_done := true;
        EzFile.make_dir ~p:true db_dir;
        let select path =
          let basename = Filename.basename path in
          let basename, ext = EzString.rcut_at basename '.' in
          if !sources then
            match ext with
            | "ml" | "mll" | "mly" | "mli" -> true
            | _ -> false
          else
            match String.lowercase_ascii basename with
            | "dune"
            | "makefile" -> true
            | _ -> false
        in
        EzSearch.index_directory dir ~db_dir ~db_name ~select
  end;

  let db =
    let db = ref None in
    fun () ->
      match !db with
      | None ->
          work_done := true ;
          let x =
            let f () =
              EzSearch.load_db ~db_dir ~db_name ~use_mapfile ()
            in
            if !verbose then
              EzSearch.time "Load index" f ()
            else
              f ()
          in
          db := Some x;
          x
      | Some db -> db
  in

  if !count_lines then  begin
    let db = db() in
    Printf.eprintf "Length: %d chars\n" ( EzSearch.length ~db);
    let n = EzSearch.count_lines_total ~db in
    Printf.eprintf "Indexed: %d lines\n%!" n;
  end ;

  begin
    match !search with
    | None -> ()
    | Some term ->
        let is_regexp = !is_regexp in
        let is_case_sensitive = !is_case_sensitive in
        let db = db() in
        find_term ~db ~is_regexp ~is_case_sensitive
          ~lines:!lines ~verbose:!verbose
          ~maxn:!n term ~engine:!engine ~ncores:!ncores
  end;

  begin
    match !content with
    | None -> ()
    | Some content ->
        let db = db () in
        let files = EzSearch.files ~db in
        let re = Re.Glob.glob ~anchored:true ~pathname:false content in
        let re = Re.compile re in
        let results = ref [] in
        Array.iter (fun file ->
            let s = Printf.sprintf "%s:%s" file.file_entry file.file_name in
            if Re.execp re s then
              results := file :: !results
          ) files;
        let len = List.length !results in
        match !results with
          [ file ] ->
            let content = EzSearch.file_content ~db file in
            let basename = Filename.basename file.file_name in
            Printf.printf "%s:%s\n%!" file.file_entry file.file_name;
            EzFile.write_file basename content ;
            Printf.eprintf "Content saved to %S\n%!" basename
        | results ->
            Printf.eprintf "%d files found\n%!" len;
            List.iter (fun file ->
                Printf.printf "%s:%s\n%!" file.file_entry file.file_name
              ) ( List.rev results );
  end;

  if not !work_done then
    Arg.usage arg_list arg_usage;

  ()