package regenerate

You can search for identifiers within the package.

in-package search v0.2.0

package regenerate

regenerate
- CHANGES
- LICENSE
- Library regenerate
  - Regenerate
    
    Regex
    
    Word
    
    S
    
    List
    
    String
    
    S
    
    Find
    
    Split
    
    Sub
    
    Segments
    
    SIGMA
    
    Make
- Library regenerate.segments
  - Segments
    
    OrderedMonoid
    
    S
    
    ThunkList
    
    ThunkListMemo
    
    LazyList
    
    StrictSet
    
    Trie
    
    WORD
    
    Make
    
    String
- Sources
  - regenerate
    
    langgen.ml
    
    parsing.ml
    
    regenerate.ml
    
    regenerate__.ml
    
    regex.ml
    
    word.ml
  - regenerate.segments
    
    Heap.ml
    
    LazyList.ml
    
    Segments.ml
    
    Sigs.ml
    
    StrictSet.ml
    
    ThunkList.ml
    
    ThunkListMemo.ml
    
    Trie.ml
    
    segments__.ml

Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Module `Word.String`Source

Sourcetype nonrec char = char

include module type of struct include CCString end

Basic String Utils

Sourcetype 'a sequence = ('a -> unit) -> unit

deprecated
use 'a iter instead

Sourcetype 'a iter = ('a -> unit) -> unit

Fast internal iterator.

since 2.8

Sourcetype 'a gen = unit -> 'a option

Sourcetype 'a klist = unit -> [ `Nil | `Cons of 'a * 'a klist ]

Common Signature

Sourcemodule type S = CCString.S

Strings

include module type of struct include String end

Strings

Sourcetype t = string

The type for strings.

Sourceval make : int -> char -> string

make n c is a string of length n with each index holding the character c.

raises Invalid_argument
if n < 0 or n > Sys.max_string_length.

Sourceval of_bytes : bytes -> string

Return a new string that contains the same bytes as the given byte sequence.

since 4.13.0

Sourceval to_bytes : string -> bytes

Return a new byte sequence that contains the same bytes as the given string.

since 4.13.0

Sourceval get : string -> int -> char

get s i is the character at index i in s. This is the same as writing s.[i].

raises Invalid_argument
if i not an index of s.

Concatenating

Note. The Stdlib.(^) binary operator concatenates two strings.

Sourceval concat : string -> string list -> string

concat sep ss concatenates the list of strings ss, inserting the separator string sep between each.

raises Invalid_argument
if the result is longer than Sys.max_string_length bytes.

Sourceval cat : string -> string -> string

cat s1 s2 concatenates s1 and s2 (s1 ^ s2).

raises Invalid_argument
if the result is longer than Sys.max_string_length bytes.

since 4.13.0

Predicates and comparisons

Sourceval starts_with : prefix:string -> string -> bool

starts_with ~prefix s is true if and only if s starts with prefix.

since 4.13.0

Sourceval ends_with : suffix:string -> string -> bool

ends_with ~suffix s is true if and only if s ends with suffix.

since 4.13.0

Sourceval contains_from : string -> int -> char -> bool

contains_from s start c is true if and only if c appears in s after position start.

raises Invalid_argument
if start is not a valid position in s.

Sourceval rcontains_from : string -> int -> char -> bool

rcontains_from s stop c is true if and only if c appears in s before position stop+1.

raises Invalid_argument
if stop < 0 or stop+1 is not a valid position in s.

Sourceval contains : string -> char -> bool

contains s c is String.contains_from s 0 c.

Extracting substrings

Sourceval sub : string -> int -> int -> string

sub s pos len is a string of length len, containing the substring of s that starts at position pos and has length len.

raises Invalid_argument
if pos and len do not designate a valid substring of s.

Transforming

Sourceval fold_left : ('a -> char -> 'a) -> 'a -> string -> 'a

fold_left f x s computes f (... (f (f x s.[0]) s.[1]) ...) s.[n-1], where n is the length of the string s.

since 4.13.0

Sourceval fold_right : (char -> 'a -> 'a) -> string -> 'a -> 'a

fold_right f s x computes f s.[0] (f s.[1] ( ... (f s.[n-1] x) ...)), where n is the length of the string s.

since 4.13.0

Sourceval trim : string -> string

trim s is s without leading and trailing whitespace. Whitespace characters are: ' ', '\x0C' (form feed), '\n', '\r', and '\t'.

since 4.00.0

Sourceval escaped : string -> string

escaped s is s with special characters represented by escape sequences, following the lexical conventions of OCaml.

All characters outside the US-ASCII printable range [0x20;0x7E] are escaped, as well as backslash (0x2F) and double-quote (0x22).

The function Scanf.unescaped is a left inverse of escaped, i.e. Scanf.unescaped (escaped s) = s for any string s (unless escaped s fails).

raises Invalid_argument
if the result is longer than Sys.max_string_length bytes.

Traversing

Searching

Sourceval index_from : string -> int -> char -> int

index_from s i c is the index of the first occurrence of c in s after position i.

raises Not_found
if c does not occur in s after position i.

raises Invalid_argument
if i is not a valid position in s.

Sourceval index_from_opt : string -> int -> char -> int option

index_from_opt s i c is the index of the first occurrence of c in s after position i (if any).

raises Invalid_argument
if i is not a valid position in s.

since 4.05

Sourceval rindex_from : string -> int -> char -> int

rindex_from s i c is the index of the last occurrence of c in s before position i+1.

raises Not_found
if c does not occur in s before position i+1.

raises Invalid_argument
if i+1 is not a valid position in s.

Sourceval rindex_from_opt : string -> int -> char -> int option

rindex_from_opt s i c is the index of the last occurrence of c in s before position i+1 (if any).

raises Invalid_argument
if i+1 is not a valid position in s.

since 4.05

Sourceval index : string -> char -> int

index s c is String.index_from s 0 c.

Sourceval index_opt : string -> char -> int option

index_opt s c is String.index_from_opt s 0 c.

since 4.05

Sourceval rindex : string -> char -> int

rindex s c is String.rindex_from s (length s - 1) c.

Sourceval rindex_opt : string -> char -> int option

rindex_opt s c is String.rindex_from_opt s (length s - 1) c.

since 4.05

Strings and Sequences

Sourceval to_seqi : t -> (int * char) Seq.t

to_seqi s is like to_seq but also tuples the corresponding index.

since 4.07

UTF decoding and validations

since 4.14

UTF-8

Sourceval get_utf_8_uchar : t -> int -> Uchar.utf_decode

get_utf_8_uchar b i decodes an UTF-8 character at index i in b.

Sourceval is_valid_utf_8 : t -> bool

is_valid_utf_8 b is true if and only if b contains valid UTF-8 data.

UTF-16BE

Sourceval get_utf_16be_uchar : t -> int -> Uchar.utf_decode

get_utf_16be_uchar b i decodes an UTF-16BE character at index i in b.

Sourceval is_valid_utf_16be : t -> bool

is_valid_utf_16be b is true if and only if b contains valid UTF-16BE data.

UTF-16LE

Sourceval get_utf_16le_uchar : t -> int -> Uchar.utf_decode

get_utf_16le_uchar b i decodes an UTF-16LE character at index i in b.

Sourceval is_valid_utf_16le : t -> bool

is_valid_utf_16le b is true if and only if b contains valid UTF-16LE data.

Deprecated functions

Sourceval create : int -> bytes

create n returns a fresh byte sequence of length n. The sequence is uninitialized and contains arbitrary bytes.

raises Invalid_argument
if n < 0 or n > Sys.max_string_length.

deprecated
This is a deprecated alias of Bytes.create/BytesLabels.create.

Sourceval copy : string -> string

Return a copy of the given string.

deprecated
Because strings are immutable, it doesn't make much sense to make identical copies of them.

Sourceval fill : bytes -> int -> int -> char -> unit

fill s pos len c modifies byte sequence s in place, replacing len bytes by c, starting at pos.

raises Invalid_argument
if pos and len do not designate a valid substring of s.

deprecated
This is a deprecated alias of Bytes.fill/BytesLabels.fill.

Sourceval uppercase : string -> string

Return a copy of the argument, with all lowercase letters translated to uppercase, including accented letters of the ISO Latin-1 (8859-1) character set.

deprecated
Functions operating on Latin-1 character set are deprecated.

Sourceval lowercase : string -> string

Return a copy of the argument, with all uppercase letters translated to lowercase, including accented letters of the ISO Latin-1 (8859-1) character set.

deprecated
Functions operating on Latin-1 character set are deprecated.

Sourceval capitalize : string -> string

Return a copy of the argument, with the first character set to uppercase, using the ISO Latin-1 (8859-1) character set..

deprecated
Functions operating on Latin-1 character set are deprecated.

Sourceval uncapitalize : string -> string

Return a copy of the argument, with the first character set to lowercase, using the ISO Latin-1 (8859-1) character set.

deprecated
Functions operating on Latin-1 character set are deprecated.

Binary decoding of integers

The functions in this section binary decode integers from strings.

All following functions raise Invalid_argument if the characters needed at index i to decode the integer are not available.

Little-endian (resp. big-endian) encoding means that least (resp. most) significant bytes are stored first. Big-endian is also known as network byte order. Native-endian encoding is either little-endian or big-endian depending on Sys.big_endian.

32-bit and 64-bit integers are represented by the int32 and int64 types, which can be interpreted either as signed or unsigned numbers.

8-bit and 16-bit integers are represented by the int type, which has more bits than the binary encoding. These extra bits are sign-extended (or zero-extended) for functions which decode 8-bit or 16-bit integers and represented them with int values.

Sourceval get_uint8 : string -> int -> int

get_uint8 b i is b's unsigned 8-bit integer starting at character index i.

since 4.13.0

Sourceval get_int8 : string -> int -> int

get_int8 b i is b's signed 8-bit integer starting at character index i.

since 4.13.0

Sourceval get_uint16_ne : string -> int -> int

get_uint16_ne b i is b's native-endian unsigned 16-bit integer starting at character index i.

since 4.13.0

Sourceval get_uint16_be : string -> int -> int

get_uint16_be b i is b's big-endian unsigned 16-bit integer starting at character index i.

since 4.13.0

Sourceval get_uint16_le : string -> int -> int

get_uint16_le b i is b's little-endian unsigned 16-bit integer starting at character index i.

since 4.13.0

Sourceval get_int16_ne : string -> int -> int

get_int16_ne b i is b's native-endian signed 16-bit integer starting at character index i.

since 4.13.0

Sourceval get_int16_be : string -> int -> int

get_int16_be b i is b's big-endian signed 16-bit integer starting at character index i.

since 4.13.0

Sourceval get_int16_le : string -> int -> int

get_int16_le b i is b's little-endian signed 16-bit integer starting at character index i.

since 4.13.0

Sourceval get_int32_ne : string -> int -> int32

get_int32_ne b i is b's native-endian 32-bit integer starting at character index i.

since 4.13.0

Sourceval get_int32_be : string -> int -> int32

get_int32_be b i is b's big-endian 32-bit integer starting at character index i.

since 4.13.0

Sourceval get_int32_le : string -> int -> int32

get_int32_le b i is b's little-endian 32-bit integer starting at character index i.

since 4.13.0

Sourceval get_int64_ne : string -> int -> int64

get_int64_ne b i is b's native-endian 64-bit integer starting at character index i.

since 4.13.0

Sourceval get_int64_be : string -> int -> int64

get_int64_be b i is b's big-endian 64-bit integer starting at character index i.

since 4.13.0

Sourceval get_int64_le : string -> int -> int64

get_int64_le b i is b's little-endian 64-bit integer starting at character index i.

since 4.13.0

Sourceval equal : string -> string -> bool

Equality function on strings.

Sourceval compare : string -> string -> int

Sourceval is_empty : string -> bool

is_empty s returns true iff s is empty (i.e. its length is 0).

since 1.5

Sourceval hash : string -> int

Sourceval init : int -> (int -> char) -> string

Like Array.init.

since 0.3.3

Sourceval rev : string -> string

rev s returns the reverse of s.

since 0.17

Sourceval pad : ?side:[ `Left | `Right ] -> ?c:char -> int -> string -> string

pad n str ensures that str is at least n bytes long, and pads it on the side with c if it's not the case.

parameter side
determines where padding occurs (default: `Left).

parameter c
the char used to pad (default: ' ').

since 0.17

Sourceval of_char : char -> string

of_char 'a' is "a".

since 0.19

Sourceval of_gen : char gen -> string

Convert a gen of characters to a string.

Sourceval of_iter : char iter -> string

Convert a iter of characters to a string.

since 2.8

Sourceval of_std_seq : char Seq.t -> string

Convert a sequence of characters to a string.

since 2.8

Sourceval of_seq : char sequence -> string

deprecated use of_iter

Sourceval of_klist : char klist -> string

deprecated use of_std_seq

Sourceval of_list : char list -> string

Convert a list of characters to a string.

Sourceval of_array : char array -> string

Convert an array of characters to a string.

Sourceval to_array : string -> char array

Return the array of characters contained in the string.

Sourceval find : ?start:int -> sub:string -> string -> int

Find sub in string, returns its first index or -1.

Sourceval find_all : ?start:int -> sub:string -> string -> int gen

find_all ~sub s finds all occurrences of sub in s, even overlapping instances.

parameter start
starting position in s.

since 0.17

Sourceval find_all_l : ?start:int -> sub:string -> string -> int list

find_all_l ~sub s finds all occurrences of sub in s and returns them in a list.

parameter start
starting position in s.

since 0.17

Sourceval mem : ?start:int -> sub:string -> string -> bool

mem ~sub s is true iff sub is a substring of s.

since 0.12

Sourceval rfind : sub:string -> string -> int

Find sub in string from the right, returns its first index or -1. Should only be used with very small sub.

since 0.12

Source

val replace : 
  ?which:[ `Left | `Right | `All ] ->
  sub:string ->
  by:string ->
  string ->
  string

replace ~sub ~by s replaces some occurrences of sub by by in s.

parameter which
decides whether the occurrences to replace are:
- `Left first occurrence from the left (beginning).
- `Right first occurrence from the right (end).
- `All all occurrences (default).

raises Invalid_argument
if sub = "".

since 0.14

Sourceval is_sub : sub:string -> int -> string -> int -> sub_len:int -> bool

is_sub ~sub i s j ~sub_len returns true iff the substring of sub starting at position i and of length sub_len is a substring of s starting at position j.

Sourceval repeat : string -> int -> string

The same string, repeated n times.

Sourceval prefix : pre:string -> string -> bool

prefix ~pre s returns true iff pre is a prefix of s.

Sourceval suffix : suf:string -> string -> bool

suffix ~suf s returns true iff suf is a suffix of s.

since 0.7

Sourceval chop_prefix : pre:string -> string -> string option

chop_prefix ~pre s removes pre from s if pre really is a prefix of s, returns None otherwise.

since 0.17

Sourceval chop_suffix : suf:string -> string -> string option

chop_suffix ~suf s removes suf from s if suf really is a suffix of s, returns None otherwise.

since 0.17

Sourceval take : int -> string -> string

take n s keeps only the n first chars of s.

since 0.17

Sourceval drop : int -> string -> string

drop n s removes the n first chars of s.

since 0.17

Sourceval take_drop : int -> string -> string * string

take_drop n s = take n s, drop n s.

since 0.17

Sourceval lines : string -> string list

lines s returns a list of the lines of s (splits along '\n').

since 0.10

Sourceval lines_gen : string -> string gen

lines_gen s returns a generator of the lines of s (splits along '\n').

since 0.10

Sourceval concat_gen : sep:string -> string gen -> string

concat_gen ~sep g concatenates all strings of g, separated with sep.

since 0.10

Sourceval unlines : string list -> string

unlines l concatenates all strings of l, separated with '\n'.

since 0.10

Sourceval unlines_gen : string gen -> string

unlines_gen g concatenates all strings of g, separated with '\n'.

since 0.10

Sourceval set : string -> int -> char -> string

set s i c creates a new string which is a copy of s, except for index i, which becomes c.

raises Invalid_argument
if i is an invalid index.

since 0.12

Sourceval iter : (char -> unit) -> string -> unit

Alias to String.iter.

since 0.12

Sourceval iteri : (int -> char -> unit) -> string -> unit

Iter on chars with their index.

since 0.12

Sourceval map : (char -> char) -> string -> string

Map chars.

since 0.12

Sourceval mapi : (int -> char -> char) -> string -> string

Map chars with their index.

since 0.12

Sourceval filter_map : (char -> char option) -> string -> string

filter_map f s calls (f a0) (f a1) ... (f an) where a0 ... an are the characters of s. It returns the string of characters ci such as f ai = Some ci (when f returns None, the corresponding element of s is discarded).

since 0.17

Sourceval filter : (char -> bool) -> string -> string

filter f s discards characters not satisfying f.

since 0.17

Sourceval flat_map : ?sep:string -> (char -> string) -> string -> string

Map each chars to a string, then concatenates them all.

parameter sep
optional separator between each generated string.

since 0.12

Sourceval for_all : (char -> bool) -> string -> bool

True for all chars?

since 0.12

Sourceval exists : (char -> bool) -> string -> bool

True for some char?

since 0.12

include S with type t := string

Sourceval length : string -> int

Return the length (number of characters) of the given string.

Sourceval blit : string -> int -> Bytes.t -> int -> int -> unit

Like String.blit. Compatible with the -safe-string option.

raises Invalid_argument
if indices are not valid.

Sourceval fold : ('a -> char -> 'a) -> 'a -> string -> 'a

Fold on chars by increasing index.

since 0.7

Conversions

Sourceval to_gen : string -> char CCString.gen

Return the gen of characters contained in the string.

Sourceval to_iter : string -> char CCString.iter

Return the iter of characters contained in the string.

since 2.8

Sourceval to_std_seq : string -> char Seq.t

to_std_seq s returns a Seq.t of the bytes in s.

since 2.8

Sourceval to_klist : string -> char CCString.klist

deprecated use to_std_seq

Sourceval to_list : string -> char list

Return the list of characters contained in the string.

Sourceval pp_buf : Buffer.t -> string -> unit

Renamed from pp since 2.0.

Sourceval drop_while : (char -> bool) -> t -> t

drop_while f s discards any characters starting from the left, up to the first character c not satisfying f c.

since 2.2

Sourceval rdrop_while : (char -> bool) -> t -> t

rdrop_while f s discards any characters starting from the right, up to the first character c not satisfying f c.

since 2.2

Sourceval ltrim : t -> t

Trim space on the left (see String.trim for more details).

since 1.2

Sourceval rtrim : t -> t

Trim space on the right (see String.trim for more details).

since 1.2

Operations on 2 strings

Sourceval map2 : (char -> char -> char) -> string -> string -> string

Map pairs of chars.

raises Invalid_argument
if the strings have not the same length.

since 0.12

Sourceval iter2 : (char -> char -> unit) -> string -> string -> unit

Iterate on pairs of chars.

raises Invalid_argument
if the strings have not the same length.

since 0.12

Sourceval iteri2 : (int -> char -> char -> unit) -> string -> string -> unit

Iterate on pairs of chars with their index.

raises Invalid_argument
if the strings have not the same length.

since 0.12

Sourceval fold2 : ('a -> char -> char -> 'a) -> 'a -> string -> string -> 'a

Fold on pairs of chars.

raises Invalid_argument
if the strings have not the same length.

since 0.12

Sourceval for_all2 : (char -> char -> bool) -> string -> string -> bool

All pairs of chars respect the predicate?

raises Invalid_argument
if the strings have not the same length.

since 0.12

Sourceval exists2 : (char -> char -> bool) -> string -> string -> bool

Exists a pair of chars?

raises Invalid_argument
if the strings have not the same length.

since 0.12

Ascii functions

Those functions are deprecated in String since 4.03, so we provide a stable alias for them even in older versions.

Sourceval capitalize_ascii : string -> string

See String.

since 0.18

Sourceval uncapitalize_ascii : string -> string

See String.

since 0.18

Sourceval uppercase_ascii : string -> string

See String.

since 0.18

Sourceval lowercase_ascii : string -> string

See String.

since 0.18

Sourceval equal_caseless : string -> string -> bool

Comparison without respect to ascii lowercase.

since 1.2

Finding

A relatively efficient algorithm for finding sub-strings.

since 1.0

Sourcemodule Find = CCString.Find

Splitting

Sourcemodule Split = CCString.Split

Sourceval split_on_char : char -> string -> string list

Split the string along the given char.

since 1.2

Sourceval split : by:string -> string -> string list

Alias to Split.list_cpy.

since 1.2

Utils

Sourceval compare_versions : string -> string -> int

compare_versions a b compares version strings a and b, considering that numbers are above text.

since 0.13

Sourceval compare_natural : string -> string -> int

Natural Sort Order, comparing chunks of digits as natural numbers. https://en.wikipedia.org/wiki/Natural_sort_order

since 1.3

Sourceval edit_distance : string -> string -> int

Edition distance between two strings. This satisfies the classical distance axioms: it is always positive, symmetric, and satisfies the formula distance a b + distance b c >= distance a c.

Slices

A contiguous part of a string

Sourcemodule Sub = CCString.Sub

Sourceval empty : string

Sourceval singleton : char -> string

Sourceval cons : char -> string -> string

Sourceval append : string -> string -> string

Sourceval compare_char : Char.t -> Char.t -> int

Sourceval pp : Format.formatter -> string -> unit

Sourceval to_seq : string -> (char -> unit) -> unit

package regenerate

Module Word.StringSource

Basic String Utils

Common Signature

Strings

Strings

Concatenating

Predicates and comparisons

Extracting substrings

Transforming

Traversing

Searching

Strings and Sequences

UTF decoding and validations

UTF-8

UTF-16BE

UTF-16LE

Deprecated functions

Binary decoding of integers

Conversions

Operations on 2 strings

Ascii functions

Finding

Splitting

Utils

Slices

Module `Word.String`Source