s ^ s' is
module Char : sig ... end
Characters (bytes in fact).
First note that it is not a goal of
Astring to maintain compatibility with the OCaml
- Strings are assumed to be immutable.
- Deprecated functions are not included.
- Some rarely used functions are dropped, some signatures and names are altered, a few often needed functions are added.
- Scanning functions are not doubled for supporting forward and reverse directions. Both directions are supported via a single function and an optional
- Functions do not raise
Not_found. They return
- Functions escaping bytes to printable US-ASCII characters use capital hexadecimal escapes rather than decimal ones.
- US-ASCII string support is collected in the
String.Asciisubmodules. The functions make sure to operate only on the US-ASCII code points (rather than ISO/IEC 8859-1 code points). This means they can safely be used on UTF-8 encoded strings, they will of course only deal with the US-ASCII subset U+0000 to U+007F of Unicode scalar values.
- The module has pre-applied exception safe
Astring at the top of a module that uses the OCaml standard library in a project that compiles with
-safe-string will either result in typing errors or compatible behaviour except for uses of the
String.trim function, see below.
If for some reason you can't compile your project with
-safe-string this may not be a problem. However you have to make sure that your code does not depend on fresh strings being returned by functions of the
String module. The functions of
Astring.String assume strings to be immutable and thus do not always allocate fresh strings for their results. This is the case for example for the
(^) operator redefinition: no string is allocated whenever one of its arguments is an empty string. That being said it is still better to first make your project compile with
-safe-string and then port to
String.sub function is renamed to
String.with_range. If you are working with
String.find you may find it easier to use
String.with_index_range which takes indices as arguments and is thus directly usable with the result of
String.find. But in general index based string processing should be frowned upon and replaced by substring extraction combinators.
The standard OCaml
String.trim function only trims the characters
Astring the default set adds vertical tab (
0x0B) to the set to match the behaviour of the C
If you want to preserve the behaviour of the original function you can replace any use of
String.trim with the following
let std_ocaml_trim s = let drop = function | ' ' | '\n' | '\012' | '\r' | '\t' -> true | _ -> false in String.trim ~drop s
We show how to use substrings to quickly devise LL(1) parsers. To keep it simple we do not implement precise error report, but note that it would be easy to add it by replacing the
raise Exit calls by an exception with more information: we have everything at hand at these call points to report good error messages.
The first example parses version numbers structured as follows:
Str regular expression for this would be:
Using substrings is certainly less terse but note that the parser is made of reusable sub-functions.
let parse_version : string -> (int * int * int * string option) option = fun s -> try let parse_opt_v s = match String.Sub.head s with | Some ('v'|'V') -> String.Sub.tail s | Some _ -> s | None -> raise Exit in let parse_dot s = match String.Sub.head s with | Some '.' -> String.Sub.tail s | Some _ | None -> raise Exit in let parse_int s = match String.Sub.span ~min:1 ~sat:Char.Ascii.is_digit s with | (i, _) when String.Sub.is_empty i -> raise Exit | (i, s) -> match String.Sub.to_int i with | None -> raise Exit | Some i -> i, s in let maj, s = parse_int (parse_opt_v (String.sub s)) in let min, s = parse_int (parse_dot s) in let patch, s = match String.Sub.head s with | Some '.' -> parse_int (parse_dot s) | _ -> 0, s in let info = match String.Sub.head s with | Some ('+' | '-') -> Some (String.Sub.(to_string (tail s))) | Some _ -> raise Exit | None -> None in Some (maj, min, patch, info) with Exit -> None
The second example parses space separated key-value bindings environments of the form:
key0 = value0 key2 = value2 ...
To support values with spaces, values can be quoted between two
'"' characters. If they are quoted then any
"\\\"" subsequence (
0x22) is interpreted as the character
0x2F) is interpreted as the character
let parse_env : string -> string String.map option = fun s -> try let skip_white s = String.Sub.drop ~sat:Char.Ascii.is_white s in let parse_key s = let id_char c = Char.Ascii.is_letter c || c = '_' in match String.Sub.span ~min:1 ~sat:id_char s with | (key, _) when String.Sub.is_empty key -> raise Exit | (key, rem) -> (String.Sub.to_string key), rem in let parse_eq s = match String.Sub.head s with | Some '=' -> String.Sub.tail s | Some _ | None -> raise Exit in let parse_value s = match String.Sub.head s with | Some '"' -> (* quoted *) let is_data = function '\\' | '"' -> false | _ -> true in let rec loop acc s = let data, rem = String.Sub.span ~sat:is_data s in match String.Sub.head rem with | Some '"' -> let acc = List.rev (data :: acc) in String.Sub.(to_string @@ concat acc), (String.Sub.tail rem) | Some '\\' -> let rem = String.Sub.tail rem in begin match String.Sub.head rem with | Some ('"' | '\\' as c) -> let acc = String.(sub (of_char c)) :: data :: acc in loop acc (String.Sub.tail rem) | Some _ | None -> raise Exit end | None | Some _ -> raise Exit in loop  (String.Sub.tail s) | Some _ -> let is_data c = not (Char.Ascii.is_white c) in let data, rem = String.Sub.span ~sat:is_data s in String.Sub.to_string data, rem | None -> "", s in let rec parse_bindings acc s = if String.Sub.is_empty s then acc else let key, s = parse_key s in let value, s = s |> skip_white |> parse_eq |> skip_white |> parse_value in parse_bindings (String.Map.add key value acc) (skip_white s) in Some (String.sub s |> skip_white |> parse_bindings String.Map.empty) with Exit -> None