package morbig

  1. Overview
  2. Docs

Source file quoteRemoval.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
(**************************************************************************)
(*  Copyright (C) 2017-2023 Yann Régis-Gianas, Nicolas Jeannerod,         *)
(*  Ralf Treinen.                                                         *)
(*                                                                        *)
(*  This is free software: you can redistribute it and/or modify it       *)
(*  under the terms of the GNU General Public License, version 3.         *)
(*                                                                        *)
(*  Additional terms apply, due to the reproduction of portions of        *)
(*  the POSIX standard. Please refer to the file COPYING for details.     *)
(**************************************************************************)

(*specification:

   2.6.7 Quote Removal

   The quote characters ( <backslash>, single-quote, and double-quote)
   that were present in the original word shall be removed unless they
   have themselves been quoted.

*)

(** [on_string s] yields a copy of string [s], with all quotes removed
    as described in the specification. *)
let on_string s =
  let n = String.length s in
  let b = Buffer.create n in
  let i = ref 0 in
  let keep () = Buffer.add_char b s.[!i]; incr i in
  let skip () = incr i in
  let followed_by_special_character () =
    !i + 1 < n
    && match s.[!i + 1] with '"' | '`' | '$' | '\\' -> true | _ -> false
  in
  while !i<n do
    if s.[!i] = '\''
    then begin
        (* skip the initial single quote *)
        skip ();
        (* scan and push on the buffer until next single quote *)
        while (!i<n && s.[!i] <> '\'') do
          keep ()
        done;
        (* skip the final single quote *)
        if !i<n then skip ()
      end
    else if s.[!i] = '"'
    then
      (* just skip any double quote if we see it here (that is, not escaped
           and not inside single quotes *)
      skip ()
    else if s.[!i] = '\\'
    then begin
        (* skip the backslash *)
        if followed_by_special_character () then skip ();
        (* and push the next symbol on the buffer *)
        if !i<n then keep ()
      end
    else keep ()
  done;
  Buffer.contents b

type backslash_automaton_state = Default | Backslash
let backslash_as_in_doublequotes s =
  let n = String.length s in
  let b = Buffer.create n in
  let i = ref 0 and state = ref Default in
  let keep () = Buffer.add_char b s.[!i]; incr i
  and pushbackslash () = Buffer.add_char b '\\'
  and skip () = incr i in
  while !i < n do
    match !state with
    | Default ->
       if s.[!i] = '\\'
       then begin state := Backslash; skip () end
       else keep ()
    | Backslash -> begin
        if List.mem s.[!i] ['$'; '\''; '"'; '\\'] then keep ()
        else begin pushbackslash(); keep () end;
        state := Default
      end
  done;
  if !state = Backslash then pushbackslash (); (* FIXME: can that happen? *)
  Buffer.contents b

let remove_tabs_at_linestart =
  let space = Str.regexp "^\t+" in
  fun s ->
  Str.(global_replace space "" s)