package commons

  1. Overview
  2. Docs
Yet another set of common utilities

Install

dune-project
 Dependency

Authors

Maintainers

Sources

commons_1.8.0.tar.gz
md5=00142d2d5f299c86ee44f19820bf9874
sha512=d25a57c434514ecb9adc5a129eeec9feca1cea2d1383e3bde74b52a05da174a09e0f46e7407f7b86ecdf1bf6faf7e0a66ef744d6fb389cb5f8398bc32e349555

doc/src/commons/SPcre.ml.html

Source file SPcre.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
(*
   Shared settings for using the Pcre module (pcre-ocaml library).
*)

open Printf

let logger = Logging.get_logger [ __MODULE__ ]

(*
   Flag required for the following to succeed:
     Pcre.regexp ~flags:[`UTF8] "\\x{200E}"
*)
let extra_flag = `UTF8

(*
   'limit' and 'limit_recursion' are set explicitly to make semgrep
   fail consistently across platforms (e.g. CI vs. local Mac).
   The default compile-time defaults are 10_000_000 for both
   'limit' and 'limit_recursion' but they can be overridden during
   the installation of the pcre library. We protect ourselves
   from such custom installs.
*)
let regexp ?study ?iflags ?(flags = []) ?chtables pat =
  (* pcre doesn't mind if a flag is duplicated so we just append extra flags *)
  let flags = extra_flag :: flags in
  (* OCaml's Pcre library does not support setting timeouts, and since it's just
   * a wrapper for a C library `Common.set_timeout` doesn't work... So, we set a
   * lower `limit` and `limit_recursion` (default values are 10_000_000) to avoid
   * spending too much time on regex matching. See perf/input/semgrep_targets.txt
   * and perf/input/semgrep_targets.yaml for an example where Semgrep appeared to
   * hang (but it was just the Pcre engine taking way too much time). *)
  Pcre.regexp ?study ~limit:1_000_000 (* sets PCRE_EXTRA_MATCH_LIMIT *)
    ~limit_recursion:1_000_000 (* sets PCRE_EXTRA_MATCH_LIMIT_RECURSION *)
    ?iflags ~flags ?chtables pat

let pmatch ?iflags ?flags ?rex ?pos ?callout subj =
  try Ok (Pcre.pmatch ?iflags ?flags ?rex ?pos ?callout subj) with
  | Pcre.Error err -> Error err

let exec ?iflags ?flags ?rex ?pos ?callout subj =
  try Ok (Some (Pcre.exec ?iflags ?flags ?rex ?pos ?callout subj)) with
  | Not_found -> Ok None
  | Pcre.Error err -> Error err

let exec_all ?iflags ?flags ?rex ?pos ?callout subj =
  try Ok (Pcre.exec_all ?iflags ?flags ?rex ?pos ?callout subj) with
  | Not_found -> Ok [||]
  | Pcre.Error err -> Error err

let exec_to_strings ?iflags ?flags ?rex ?pos ?callout subj =
  match exec_all ?iflags ?flags ?rex ?pos ?callout subj with
  | Ok a -> Ok (Array.map Pcre.get_substrings a)
  | Error _ as e -> e

let split ?iflags ?flags ?rex ?pos ?max ?callout subj =
  try Ok (Pcre.split ?iflags ?flags ?rex ?pos ?max ?callout subj) with
  | Pcre.Error err -> Error err

let string_of_error (error : Pcre.error) =
  match error with
  | Partial -> "Partial"
  | BadPartial -> "BadPartial"
  | BadPattern (msg, pos) -> sprintf "Pcre.BadPattern(%S, pos=%i)" msg pos
  | BadUTF8 -> "BadUTF8"
  | BadUTF8Offset -> "BadUTF8Offset"
  | MatchLimit -> "MatchLimit"
  | RecursionLimit -> "RecursionLimit"
  | WorkspaceSize -> "WorkspaceSize"
  | InternalError msg -> sprintf "InternalError(%S)" msg

let log_error subj err =
  let string_fragment =
    let len = String.length subj in
    if len < 200 then subj
    else sprintf "%s ... (%i bytes)" (Str.first_chars subj 200) len
  in
  logger#error "PCRE error: %s on input %S" (string_of_error err)
    string_fragment

let pmatch_noerr ?iflags ?flags ?rex ?pos ?callout ?(on_error = false) subj =
  match pmatch ?iflags ?flags ?rex ?pos ?callout subj with
  | Ok res -> res
  | Error err ->
      log_error subj err;
      on_error

let exec_noerr ?iflags ?flags ?rex ?pos ?callout subj =
  match exec ?iflags ?flags ?rex ?pos ?callout subj with
  | Ok res -> res
  | Error err ->
      log_error subj err;
      None

let exec_all_noerr ?iflags ?flags ?rex ?pos ?callout subj =
  match exec_all ?iflags ?flags ?rex ?pos ?callout subj with
  | Ok res -> res
  | Error err ->
      log_error subj err;
      [||]

let split_noerr ?iflags ?flags ?rex ?pos ?max ?callout ~on_error subj =
  match split ?iflags ?flags ?rex ?pos ?max ?callout subj with
  | Ok res -> res
  | Error err ->
      log_error subj err;
      on_error

let string_of_exn (e : exn) =
  match e with
  | Pcre.Error error -> Some (sprintf "Pcre.Error(%s)" (string_of_error error))
  | Pcre.Backtrack -> Some "Pcre.Backtrack"
  | Pcre.Regexp_or (pat, error) ->
      Some (sprintf "Pcre.Regexp_or(pat=%S, %s)" pat (string_of_error error))
  | _not_from_pcre -> None

(*
   You can test this with:

     $ dune utop
     # SPcre.register_exception_printer ();;
     # Pcre.pmatch ~pat:"(a+)+$" "aaaaaaaaaaaaaaaaaaaaaaaaaa!"
         |> assert false
       with e -> Printexc.to_string e;;
     - : string = "Pcre.Error(MatchLimit)"
*)
let register_exception_printer () = Printexc.register_printer string_of_exn
OCaml

Innovation. Community. Security.