package parseff

  1. Overview
  2. Docs
Direct-style parser combinator library for OCaml 5 powered by algebraic effects

Install

dune-project
 Dependency

Authors

Maintainers

Sources

parseff-0.1.0.tbz
sha256=097c71a38b39ab5925518e16c0efdf3b77a6b3b2185c82f168e0f1f4cb0772bf
sha512=811fbd770148bf3004ffc764dc08fa1a3ded9b4613f5749a6d2841c1af868de7afff4dd6b808b38254d28592433e23613573707159dfa171687839c520e93bb3

doc/repetition.html

Repetition and separation

These combinators handle patterns that repeat: lists, separated values, delimited blocks, and operator chains.

Basic repetition

many

Parseff.many applies a parser zero or more times. Returns a list of results. Always succeeds (returns [] if the parser fails immediately).

val many : (unit -> 'a) -> unit -> 'a list
let digits () = Parseff.many Parseff.digit ()
(* "123"  -> [1; 2; 3]  *)
(* ""     -> []         *)
(* "abc"  -> []         *)

many1

Parseff.many1 is like Parseff.many but requires at least one match. Fails if the parser doesn't succeed at least once.

val many1 : (unit -> 'a) -> unit -> 'a list
let digits1 () = Parseff.many1 Parseff.digit ()
(* "123" -> [1; 2; 3]  *)
(* ""    -> Error      *)
(* "abc" -> Error      *)

count

Parseff.count applies a parser exactly n times. Fails if the parser doesn't match n times.

val count : int -> (unit -> 'a) -> unit -> 'a list
let three_digits () = Parseff.count 3 Parseff.digit ()
(* "123"  -> [1; 2; 3]  *)
(* "12"   -> Error      *)

Useful for fixed-width formats:

let hex_digit () =
  Parseff.satisfy
    (fun c ->
      (c >= '0' && c <= '9')
      || (c >= 'a' && c <= 'f')
      || (c >= 'A' && c <= 'F'))
    ~label:"hex digit"

(* Parse #RRGGBB color *)
let hex_color () =
  let _ = Parseff.char '#' in
  let r = Parseff.count 2 hex_digit () in
  let g = Parseff.count 2 hex_digit () in
  let b = Parseff.count 2 hex_digit () in
  (r, g, b)
(* "#ff00aa" -> (['f';'f'], ['0';'0'], ['a';'a']) *)

Separated lists

sep_by

Parseff.sep_by parses zero or more elements separated by a separator. The separator's return value is discarded. Always succeeds.

val sep_by : (unit -> 'a) -> (unit -> 'b) -> unit -> 'a list
let csv_line () =
  Parseff.sep_by
    (fun () -> Parseff.take_while (fun c -> c <> ',' && c <> '\n'))
    (fun () -> Parseff.char ',')
    ()
(* "a,b,c" -> ["a"; "b"; "c"] *)
(* ""      -> [""]             *)

sep_by1

Parseff.sep_by1 is like Parseff.sep_by but requires at least one element.

val sep_by1 : (unit -> 'a) -> (unit -> 'b) -> unit -> 'a list
let csv_line1 () =
  Parseff.sep_by1
    (fun () ->
      Parseff.take_while1
        (fun c -> c <> ',' && c <> '\n')
        ~label:"value")
    (fun () -> Parseff.char ',')
    ()
(* "a,b,c" -> ["a"; "b"; "c"] *)
(* "a"     -> ["a"]           *)
(* ""      -> Error            *)

Delimiters and terminators

between

Parseff.between parses an opening delimiter, then the body, then a closing delimiter. Returns the body's value.

val between : (unit -> 'a) -> (unit -> 'b) -> (unit -> 'c) -> unit -> 'c
let parens p =
  Parseff.between
    (fun () -> Parseff.char '(')
    (fun () -> Parseff.char ')')
    p

let parenthesized_digit () =
  parens
    (fun () ->
      Parseff.skip_whitespace ();
      let n = Parseff.digit () in
      Parseff.skip_whitespace ();
      n)
    ()
(* "(42)"   -> 42 *)
(* "( 42 )" -> Error (only parses one digit) *)

Works well for bracketed structures:

let braces p =
  Parseff.between
    (fun () -> Parseff.char '{')
    (fun () -> Parseff.char '}')
    p

let brackets p =
  Parseff.between
    (fun () -> Parseff.char '[')
    (fun () -> Parseff.char ']')
    p

end_by

Parseff.end_by parses zero or more elements, each followed by a separator. Unlike Parseff.sep_by, the separator comes after each element (including the last).

val end_by : (unit -> 'a) -> (unit -> 'b) -> unit -> 'a list
(* Parse semicolon-terminated statements *)
let statements () =
  Parseff.end_by
    (fun () ->
      Parseff.take_while1
        (fun c -> c <> ';' && c <> '\n')
        ~label:"statement")
    (fun () -> Parseff.char ';')
    ()
(* "a;b;c;" -> ["a"; "b"; "c"] *)
(* ""       -> []               *)

end_by1

Parseff.end_by1 is like Parseff.end_by but requires at least one element.

val end_by1 : (unit -> 'a) -> (unit -> 'b) -> unit -> 'a list

Operator chains

These combinators parse sequences of values joined by operators, handling associativity. They're the standard tool for expression parsing with operator precedence.

chainl1

Parseff.chainl1 parses one or more values separated by an operator, combining them left-associatively. The operator parser returns a function that combines two values.

val chainl1 : (unit -> 'a) -> (unit -> 'a -> 'a -> 'a) -> unit -> 'a
(* Parse "1-2-3" as ((1-2)-3) = -4 *)
let subtraction () =
  Parseff.chainl1
    (fun () -> Parseff.digit ())
    (fun () ->
      let _ = Parseff.char '-' in
      fun a b -> a - b)
    ()
(* "1-2-3" -> -4  (left-associative: (1-2)-3) *)

chainr1

Parseff.chainr1 is like Parseff.chainl1 but combines right-associatively.

val chainr1 : (unit -> 'a) -> (unit -> 'a -> 'a -> 'a) -> unit -> 'a
(* Parse "2^3^2" as 2^(3^2) = 512 *)
let power () =
  Parseff.chainr1
    (fun () -> Parseff.digit ())
    (fun () ->
      let _ = Parseff.char '^' in
      fun a b -> int_of_float (float_of_int a ** float_of_int b))
    ()
(* "2^3^2" -> 512  (right-associative: 2^(3^2)) *)

chainl

Parseff.chainl is like Parseff.chainl1 but takes a default value. Returns the default if zero elements match.

val chainl : (unit -> 'a) -> (unit -> 'a -> 'a -> 'a) -> 'a -> unit -> 'a
let maybe_subtract () =
  Parseff.chainl
    (fun () -> Parseff.digit ())
    (fun () ->
      let _ = Parseff.char '-' in
      fun a b -> a - b)
    0
    ()
(* "1-2" -> -1 *)
(* ""    -> 0  *)

chainr

Parseff.chainr is like Parseff.chainr1 but with a default value for zero matches.

val chainr : (unit -> 'a) -> (unit -> 'a -> 'a -> 'a) -> 'a -> unit -> 'a

Complete example: JSON array

let integer () =
  let sign = Parseff.optional (fun () -> Parseff.char '-') () in
  let digits =
    Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit"
  in
  let n = int_of_string digits in
  match sign with Some _ -> -n | None -> n

let json_array () =
  let _ = Parseff.char '[' in
  Parseff.skip_whitespace ();
  let values =
    Parseff.sep_by
      (fun () ->
        Parseff.skip_whitespace ();
        let n = integer () in
        Parseff.skip_whitespace ();
        n)
      (fun () -> Parseff.char ',')
      ()
  in
  Parseff.skip_whitespace ();
  let _ = Parseff.char ']' in
  Parseff.end_of_input ();
  values

let () =
  match Parseff.parse "[1, -2, 3]" json_array with
  | Ok nums ->
      Printf.printf "Sum: %d\n" (List.fold_left ( + ) 0 nums)
  | Error { pos; error = `Expected msg } ->
      Printf.printf "Error at %d: %s\n" pos msg
  | Error _ -> print_endline "Parse error"