package stdune

  1. Overview
  2. Docs
Legend:
Library
Module
Module type
Parameter
Class
Class type
type t = string
include module type of struct include StringLabels end with type t := t

Strings

val make : int -> char -> string

make n c is a string of length n with each index holding the character c.

val init : int -> f:(int -> char) -> string

init n ~f is a string of length n with index i holding the character f i (called in increasing index order).

  • since 4.02
val empty : string

The empty string.

  • since 4.13
val length : string -> int

length s is the length (number of bytes/characters) of s.

val get : string -> int -> char

get s i is the character at index i in s. This is the same as writing s.[i].

val of_bytes : bytes -> string

Return a new string that contains the same bytes as the given byte sequence.

  • since 4.13
val to_bytes : string -> bytes

Return a new byte sequence that contains the same bytes as the given string.

  • since 4.13
val blit : src:string -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int -> unit

Same as Bytes.blit_string which should be preferred.

Concatenating

Note. The Stdlib.(^) binary operator concatenates two strings.

val concat : sep:string -> string list -> string

concat ~sep ss concatenates the list of strings ss, inserting the separator string sep between each.

val cat : string -> string -> string

cat s1 s2 concatenates s1 and s2 (s1 ^ s2).

  • since 4.13

Predicates and comparisons

val starts_with : prefix:string -> string -> bool

starts_with ~prefix s is true if and only if s starts with prefix.

  • since 4.13
val ends_with : suffix:string -> string -> bool

ends_with ~suffix s is true if and only if s ends with suffix.

  • since 4.13
val contains_from : string -> int -> char -> bool

contains_from s start c is true if and only if c appears in s after position start.

val rcontains_from : string -> int -> char -> bool

rcontains_from s stop c is true if and only if c appears in s before position stop+1.

val contains : string -> char -> bool

contains s c is String.contains_from s 0 c.

Extracting substrings

val sub : string -> pos:int -> len:int -> string

sub s ~pos ~len is a string of length len, containing the substring of s that starts at position pos and has length len.

val split_on_char : sep:char -> string -> string list

split_on_char ~sep s is the list of all (possibly empty) substrings of s that are delimited by the character sep.

The function's result is specified by the following invariants:

  • The list is not empty.
  • Concatenating its elements using sep as a separator returns a string equal to the input (concat (make 1 sep) (split_on_char sep s) = s).
  • No string in the result contains the sep character.
  • since 4.05

Transforming

val map : f:(char -> char) -> string -> string

map f s is the string resulting from applying f to all the characters of s in increasing order.

  • since 4.00
val mapi : f:(int -> char -> char) -> string -> string

mapi ~f s is like map but the index of the character is also passed to f.

  • since 4.02
val fold_left : f:('acc -> char -> 'acc) -> init:'acc -> string -> 'acc

fold_left f x s computes f (... (f (f x s.[0]) s.[1]) ...) s.[n-1], where n is the length of the string s.

  • since 4.13
val fold_right : f:(char -> 'acc -> 'acc) -> string -> init:'acc -> 'acc

fold_right f s x computes f s.[0] (f s.[1] ( ... (f s.[n-1] x) ...)), where n is the length of the string s.

  • since 4.13
val trim : string -> string

trim s is s without leading and trailing whitespace. Whitespace characters are: ' ', '\x0C' (form feed), '\n', '\r', and '\t'.

  • since 4.00
val escaped : string -> string

escaped s is s with special characters represented by escape sequences, following the lexical conventions of OCaml.

All characters outside the US-ASCII printable range [0x20;0x7E] are escaped, as well as backslash (0x2F) and double-quote (0x22).

The function Scanf.unescaped is a left inverse of escaped, i.e. Scanf.unescaped (escaped s) = s for any string s (unless escaped s fails).

val uppercase_ascii : string -> string

uppercase_ascii s is s with all lowercase letters translated to uppercase, using the US-ASCII character set.

  • since 4.05
val lowercase_ascii : string -> string

lowercase_ascii s is s with all uppercase letters translated to lowercase, using the US-ASCII character set.

  • since 4.05
val capitalize_ascii : string -> string

capitalize_ascii s is s with the first character set to uppercase, using the US-ASCII character set.

  • since 4.05
val uncapitalize_ascii : string -> string

uncapitalize_ascii s is s with the first character set to lowercase, using the US-ASCII character set.

  • since 4.05

Traversing

val iter : f:(char -> unit) -> string -> unit

iter ~f s applies function f in turn to all the characters of s. It is equivalent to f s.[0]; f s.[1]; ...; f s.[length s - 1]; ().

val iteri : f:(int -> char -> unit) -> string -> unit

iteri is like iter, but the function is also given the corresponding character index.

  • since 4.00

Searching

val index_from_opt : string -> int -> char -> int option

index_from_opt s i c is the index of the first occurrence of c in s after position i (if any).

  • since 4.05
val rindex_from_opt : string -> int -> char -> int option

rindex_from_opt s i c is the index of the last occurrence of c in s before position i+1 (if any).

  • since 4.05
val index_opt : string -> char -> int option

index_opt s c is String.index_from_opt s 0 c.

  • since 4.05
val rindex_opt : string -> char -> int option

rindex_opt s c is String.rindex_from_opt s (length s - 1) c.

  • since 4.05

Strings and Sequences

val to_seq : t -> char Seq.t

to_seq s is a sequence made of the string's characters in increasing order. In "unsafe-string" mode, modifications of the string during iteration will be reflected in the sequence.

  • since 4.07
val to_seqi : t -> (int * char) Seq.t

to_seqi s is like to_seq but also tuples the corresponding index.

  • since 4.07
val of_seq : char Seq.t -> t

of_seq s is a string made of the sequence's characters.

  • since 4.07

UTF decoding and validations

  • since 4.14

UTF-8

val get_utf_8_uchar : t -> int -> Uchar.utf_decode

get_utf_8_uchar b i decodes an UTF-8 character at index i in b.

val is_valid_utf_8 : t -> bool

is_valid_utf_8 b is true if and only if b contains valid UTF-8 data.

UTF-16BE

val get_utf_16be_uchar : t -> int -> Uchar.utf_decode

get_utf_16be_uchar b i decodes an UTF-16BE character at index i in b.

val is_valid_utf_16be : t -> bool

is_valid_utf_16be b is true if and only if b contains valid UTF-16BE data.

UTF-16LE

val get_utf_16le_uchar : t -> int -> Uchar.utf_decode

get_utf_16le_uchar b i decodes an UTF-16LE character at index i in b.

val is_valid_utf_16le : t -> bool

is_valid_utf_16le b is true if and only if b contains valid UTF-16LE data.

Binary decoding of integers

The functions in this section binary decode integers from strings.

All following functions raise Invalid_argument if the characters needed at index i to decode the integer are not available.

Little-endian (resp. big-endian) encoding means that least (resp. most) significant bytes are stored first. Big-endian is also known as network byte order. Native-endian encoding is either little-endian or big-endian depending on Sys.big_endian.

32-bit and 64-bit integers are represented by the int32 and int64 types, which can be interpreted either as signed or unsigned numbers.

8-bit and 16-bit integers are represented by the int type, which has more bits than the binary encoding. These extra bits are sign-extended (or zero-extended) for functions which decode 8-bit or 16-bit integers and represented them with int values.

val get_uint8 : string -> int -> int

get_uint8 b i is b's unsigned 8-bit integer starting at character index i.

  • since 4.13
val get_int8 : string -> int -> int

get_int8 b i is b's signed 8-bit integer starting at character index i.

  • since 4.13
val get_uint16_ne : string -> int -> int

get_uint16_ne b i is b's native-endian unsigned 16-bit integer starting at character index i.

  • since 4.13
val get_uint16_be : string -> int -> int

get_uint16_be b i is b's big-endian unsigned 16-bit integer starting at character index i.

  • since 4.13
val get_uint16_le : string -> int -> int

get_uint16_le b i is b's little-endian unsigned 16-bit integer starting at character index i.

  • since 4.13
val get_int16_ne : string -> int -> int

get_int16_ne b i is b's native-endian signed 16-bit integer starting at character index i.

  • since 4.13
val get_int16_be : string -> int -> int

get_int16_be b i is b's big-endian signed 16-bit integer starting at character index i.

  • since 4.13
val get_int16_le : string -> int -> int

get_int16_le b i is b's little-endian signed 16-bit integer starting at character index i.

  • since 4.13
val get_int32_ne : string -> int -> int32

get_int32_ne b i is b's native-endian 32-bit integer starting at character index i.

  • since 4.13
val seeded_hash : int -> t -> int

A seeded hash function for strings, with the same output value as Hashtbl.seeded_hash. This function allows this module to be passed as argument to the functor Hashtbl.MakeSeeded.

  • since 5.0
val get_int32_be : string -> int -> int32

get_int32_be b i is b's big-endian 32-bit integer starting at character index i.

  • since 4.13
val get_int32_le : string -> int -> int32

get_int32_le b i is b's little-endian 32-bit integer starting at character index i.

  • since 4.13
val get_int64_ne : string -> int -> int64

get_int64_ne b i is b's native-endian 64-bit integer starting at character index i.

  • since 4.13
val get_int64_be : string -> int -> int64

get_int64_be b i is b's big-endian 64-bit integer starting at character index i.

  • since 4.13
val get_int64_le : string -> int -> int64

get_int64_le b i is b's little-endian 64-bit integer starting at character index i.

  • since 4.13
val equal : t -> t -> bool
val compare : t -> t -> Ordering.t
val hash : t -> int
val to_dyn : t -> Dyn.t
val break : t -> pos:int -> t * t
val is_empty : t -> bool
val of_list : char list -> t
val is_prefix : t -> prefix:t -> bool
val is_suffix : t -> suffix:t -> bool
val take : t -> int -> t
val drop : t -> int -> t
val split_n : t -> int -> t * t
val drop_prefix : t -> prefix:t -> t option
val drop_prefix_if_exists : t -> prefix:t -> t
val drop_suffix : t -> suffix:t -> t option
val drop_suffix_if_exists : t -> suffix:t -> t
val drop_prefix_and_suffix : t -> prefix:t -> suffix:t -> t option

drop_prefix_and_suffix t ~prefix ~suffix Will attempt to remove prefix from the prefix and suffix from the suffix of t. Return Some _ only if the suffix and prefix were present.

module Caseless : sig ... end

Case-insensitive matching semantics.

val capitalize : t -> t

These only change ASCII characters

val uncapitalize : t -> t
val uppercase : t -> t
val lowercase : t -> t
val index : t -> char -> int option
val index_from : t -> int -> char -> int option
val rindex : t -> char -> int option
val rindex_from : t -> int -> char -> int option
val extract_words : t -> is_word_char:(char -> bool) -> t list
val extract_comma_space_separated_words : t -> t list
val extract_blank_separated_words : t -> t list
val lsplit2 : t -> on:char -> (t * t) option
val lsplit2_exn : t -> on:char -> t * t
val rsplit2 : t -> on:char -> (t * t) option
val split : t -> on:char -> t list

split t ~on returns the list of non-overlapping substrings of t between each occurence of on. If t begins or ends with on then an empty string will be present on the "far" side of on in the output. If on does not appear in t then the result is a list containing t (even if t is the empty string).

Note that split "" ~on returns [""] (ie. a list containing a single empty string).

This function is roughly the inverse of concat. Ie. concat ~sep:(String.make 1 c) (split ~on:c s) will return the original string s.

val split_lines : t -> t list
val escape_only : char -> t -> t

Escape ONLY one character. escape also escapes '\n',... and transforms all chars above '~' into '\xxx' which is not suitable for UTF-8 strings.

val longest : string list -> int

Return the length of the longest string in the list

val longest_map : 'a list -> f:('a -> string) -> int
val longest_prefix : t list -> t
val exists : t -> f:(char -> bool) -> bool
val for_all : t -> f:(char -> bool) -> bool
val maybe_quoted : t -> t

maybe_quoted s is s if s doesn't need escaping according to OCaml lexing conventions and sprintf "%S" s otherwise.

(* CR-someday aalekseyev: this function is not great: barely anything "needs escaping according to OCaml lexing conventions", so the condition for whether to add the quote characters ends up being quite arbitrary. *)

val quoted : t -> t
val enumerate_and : string list -> string

Produces: "x, y and z"

val enumerate_or : string list -> string

Produces: "x, y or z"

val enumerate_one_of : t list -> t

Produces: "One of x, y or z"

val findi : string -> f:(char -> bool) -> int option

Find index of first character satisfying f

val rfindi : string -> f:(char -> bool) -> int option

Find index of last character satisfying f

include Comparable_intf.S with type key := t
module Map : sig ... end
module Set : sig ... end
module Table : Hashtbl.S with type key = t
val need_quoting : string -> bool

Whether the string needs quoting if it is part of a shell command

val quote_for_shell : string -> string

quote_for_shell s quotes s using Filename.quote if need_quoting s is true

val quote_list_for_shell : string list -> string

quote_list_for_shell l is List.map l ~f:quote_for_shell |> concat ~sep:" "

val filter_map : string -> f:(char -> char option) -> string
val contains_double_underscore : string -> bool
OCaml

Innovation. Community. Security.