package catala

  1. Overview
  2. Docs
Legend:
Library
Module
Module type
Parameter
Class
Class type
module Runtime = Runtime_ocaml.Runtime
module ScopeName : sig ... end
module TopdefName : sig ... end
module StructName : sig ... end
module StructField : sig ... end
module EnumName : sig ... end
module EnumConstructor : sig ... end

Only used by surface

module RuleName : sig ... end
module LabelName : sig ... end

Used for unresolved structs/maps in desugared

module IdentName = Catala_utils.String

Only used by desugared/scopelang

module ScopeVar : sig ... end
module SubScopeName : sig ... end
module StateName : sig ... end

Abstract syntax tree

Define a common base type for the expressions in most passes of the compiler

type desugared = [
  1. | `Desugared
]

Phantom types used to select relevant cases on the generic AST

we instantiate them with a polymorphic variant to take advantage of sub-typing. The values aren't actually used.

type scopelang = [
  1. | `Scopelang
]
type dcalc = [
  1. | `Dcalc
]
type lcalc = [
  1. | `Lcalc
]
type 'a any = [< desugared | scopelang | dcalc | lcalc ] as 'a

'a any is 'a, but adds the constraint that it should be restricted to valid AST kinds

Types

type typ_lit =
  1. | TBool
  2. | TUnit
  3. | TInt
  4. | TRat
  5. | TMoney
  6. | TDate
  7. | TDuration
and naked_typ =
  1. | TLit of typ_lit
  2. | TTuple of typ list
  3. | TStruct of StructName.t
  4. | TEnum of EnumName.t
  5. | TOption of typ
  6. | TArrow of typ list * typ
  7. | TArray of typ
  8. | TAny

Constants and operators

type date = Runtime.date
type duration = Runtime.duration
type log_entry =
  1. | VarDef of naked_typ
    (*

    During code generation, we need to know the type of the variable being logged for embedding

    *)
  2. | BeginCall
  3. | EndCall
  4. | PosRecordIfTrueBool
module Op : sig ... end

Classification of operators on how they should be typed

type ('a, 'k) operator = ('a any, 'k) Op.t
type except =
  1. | ConflictError
  2. | EmptyError
  3. | NoValueProvided
  4. | Crash

Generic expressions

Define a common base type for the expressions in most passes of the compiler

type 'a glit =
  1. | LBool : bool -> 'a glit
  2. | LEmptyError : [< desugared | scopelang | dcalc ] glit
  3. | LInt : Runtime.integer -> 'a glit
  4. | LRat : Runtime.decimal -> 'a glit
  5. | LMoney : Runtime.money -> 'a glit
  6. | LUnit : 'a glit
  7. | LDate : date -> 'a glit
  8. | LDuration : duration -> 'a glit

Literals are the same throughout compilation except for the LEmptyError case which is eliminated midway through.

Locations are handled differently in desugared and scopelang

type ('a, 't) gexpr = (('a, 't) naked_gexpr, 't) Catala_utils.Marked.t

General expressions: groups all expression cases of the different ASTs, and uses a GADT to eliminate irrelevant cases for each one. The 't annotations are also totally unconstrained at this point. The dcalc exprs, for example, are then defined with type naked_expr = dcalc naked_gexpr plus the annotations.

A few tips on using this GADT:

  • To write a function that handles cases from different ASTs, explicit the type variables: fun (type a) (x: a naked_gexpr) -> ...
  • For recursive functions, you may need to additionally explicit the generalisation of the variable: let rec f: type a . a naked_gexpr -> ...
  • Always think of using the pre-defined map/fold functions in Expr rather than completely defining your recursion manually.
and ('a, 't) naked_gexpr =
  1. | ELit : 'a glit -> ('a any, 't) naked_gexpr
  2. | EApp : {
    1. f : ('a, 't) gexpr;
    2. args : ('a, 't) gexpr list;
    } -> ('a any, 't) naked_gexpr
  3. | EOp : {
    1. op : ('a, _) operator;
    2. tys : typ list;
    } -> ('a any, 't) naked_gexpr
  4. | EArray : ('a, 't) gexpr list -> ('a any, 't) naked_gexpr
  5. | EVar : ('a, 't) naked_gexpr Bindlib.var -> ('a any, 't) naked_gexpr
  6. | EAbs : {
    1. binder : (('a, 't) naked_gexpr, ('a, 't) gexpr) Bindlib.mbinder;
    2. tys : typ list;
    } -> ('a any, 't) naked_gexpr
  7. | EIfThenElse : {
    1. cond : ('a, 't) gexpr;
    2. etrue : ('a, 't) gexpr;
    3. efalse : ('a, 't) gexpr;
    } -> ('a any, 't) naked_gexpr
  8. | EStruct : {
    1. name : StructName.t;
    2. fields : ('a, 't) gexpr StructField.Map.t;
    } -> ('a any, 't) naked_gexpr
  9. | EInj : {
    1. name : EnumName.t;
    2. e : ('a, 't) gexpr;
    3. cons : EnumConstructor.t;
    } -> ('a any, 't) naked_gexpr
  10. | EMatch : {
    1. name : EnumName.t;
    2. e : ('a, 't) gexpr;
    3. cases : ('a, 't) gexpr EnumConstructor.Map.t;
    } -> ('a any, 't) naked_gexpr
  11. | ETuple : ('a, 't) gexpr list -> ('a any, 't) naked_gexpr
  12. | ETupleAccess : {
    1. e : ('a, 't) gexpr;
    2. index : int;
    3. size : int;
    } -> ('a any, 't) naked_gexpr
  13. | ELocation : 'a glocation -> ([< desugared | scopelang ] as 'a, 't) naked_gexpr
  14. | EScopeCall : {
    1. scope : ScopeName.t;
    2. args : ('a, 't) gexpr ScopeVar.Map.t;
    } -> ([< desugared | scopelang ] as 'a, 't) naked_gexpr
  15. | EDStructAccess : {
    1. name_opt : StructName.t option;
    2. e : ('a, 't) gexpr;
    3. field : IdentName.t;
    } -> (desugared as 'a, 't) naked_gexpr
    (*

    desugared has ambiguous struct fields

    *)
  16. | EStructAccess : {
    1. name : StructName.t;
    2. e : ('a, 't) gexpr;
    3. field : StructField.t;
    } -> ([< scopelang | dcalc | lcalc ] as 'a, 't) naked_gexpr
    (*

    Resolved struct/enums, after desugared

    *)
  17. | EAssert : ('a, 't) gexpr -> ([< dcalc | lcalc ] as 'a, 't) naked_gexpr
  18. | EDefault : {
    1. excepts : ('a, 't) gexpr list;
    2. just : ('a, 't) gexpr;
    3. cons : ('a, 't) gexpr;
    } -> ([< desugared | scopelang | dcalc ] as 'a, 't) naked_gexpr
  19. | EErrorOnEmpty : ('a, 't) gexpr -> ([< desugared | scopelang | dcalc ] as 'a, 't) naked_gexpr
  20. | ERaise : except -> (lcalc as 'a, 't) naked_gexpr
  21. | ECatch : {
    1. body : ('a, 't) gexpr;
    2. exn : except;
    3. handler : ('a, 't) gexpr;
    } -> (lcalc as 'a, 't) naked_gexpr
type ('a, 't) boxed_gexpr = (('a, 't) naked_gexpr Bindlib.box, 't) Catala_utils.Marked.t

The annotation is lifted outside of the box for expressions

type 'e boxed = ('a, 't) boxed_gexpr constraint 'e = ('a, 't) gexpr

('a, 't) gexpr boxed is ('a, 't) boxed_gexpr. The difference with ('a, 't) gexpr Bindlib.box is that the annotations is outside of the box, and can therefore be accessed without the need to resolve the box

type ('e, 'b) binder = (('a, 't) naked_gexpr, 'b) Bindlib.binder constraint 'e = ('a, 't) gexpr

The expressions use the Bindlib library, based on higher-order abstract syntax

type ('e, 'b) mbinder = (('a, 't) naked_gexpr, 'b) Bindlib.mbinder constraint 'e = ('a, 't) gexpr

Markings

type untyped = {
  1. pos : Catala_utils.Pos.t;
}
type typed = {
  1. pos : Catala_utils.Pos.t;
  2. ty : typ;
}
type _ mark =
  1. | Untyped : untyped -> untyped mark
  2. | Typed : typed -> typed mark

The generic type of AST markings. Using a GADT allows functions to be polymorphic in the marking, but still do transformations on types when appropriate. Expected to fill the 't parameter of gexpr and gexpr (a 't annotation different from this type is used in the middle of the typing processing, but all visible ASTs should otherwise use this.

type any_expr =
  1. | AnyExpr : (_, _ mark) gexpr -> any_expr

Useful for errors and printing, for example

Higher-level program structure

Constructs scopes and programs on top of expressions. The 'e type parameter throughout is expected to match instances of the gexpr type defined above. Markings are constrained to the mark GADT defined above. Note that this structure is at the moment only relevant for dcalc and lcalc, as scopelang has its own scope structure, as the name implies.

type scope_let_kind =
  1. | DestructuringInputStruct
    (*

    let x = input.field

    *)
  2. | ScopeVarDefinition
    (*

    let x = error_on_empty e

    *)
  3. | SubScopeVarDefinition
    (*

    let s.x = fun _ -> e or let s.x = error_on_empty e for input-only subscope variables.

    *)
  4. | CallingSubScope
    (*

    let result = s ({ x = s.x; y = s.x; ...})

    *)
  5. | DestructuringSubScopeResults
    (*

    let s.x = result.x *

    *)
  6. | Assertion
    (*

    let _ = assert e

    *)

This kind annotation signals that the let-binding respects a structural invariant. These invariants concern the shape of the expression in the let-binding, and are documented below.

type 'e scope_let = {
  1. scope_let_kind : scope_let_kind;
  2. scope_let_typ : typ;
  3. scope_let_expr : 'e;
  4. scope_let_next : ('e, 'e scope_body_expr) binder;
  5. scope_let_pos : Catala_utils.Pos.t;
} constraint 'e = (_ any, _ mark) gexpr

This type is parametrized by the expression type so it can be reused in later intermediate representations.

and 'e scope_body_expr =
  1. | Result of 'e
  2. | ScopeLet of 'e scope_let
constraint 'e = (_ any, _ mark) gexpr

A scope let-binding has all the information necessary to make a proper let-binding expression, plus an annotation for the kind of the let-binding that comes from the compilation of a Scopelang.Ast statement.

type 'e scope_body = {
  1. scope_body_input_struct : StructName.t;
  2. scope_body_output_struct : StructName.t;
  3. scope_body_expr : ('e, 'e scope_body_expr) binder;
} constraint 'e = (_ any, _ mark) gexpr

Instead of being a single expression, we give a little more ad-hoc structure to the scope body by decomposing it in an ordered list of let-bindings, and a result expression that uses the let-binded variables. The first binder is the argument of type scope_body_input_struct.

type 'e code_item =
  1. | ScopeDef of ScopeName.t * 'e scope_body
  2. | Topdef of TopdefName.t * typ * 'e
type 'e code_item_list =
  1. | Nil
  2. | Cons of 'e code_item * ('e, 'e code_item_list) binder
type scope_out_struct = {
  1. out_struct_name : StructName.t;
  2. out_struct_fields : StructField.t ScopeVar.Map.t;
}
type decl_ctx = {
  1. ctx_enums : enum_ctx;
  2. ctx_structs : struct_ctx;
  3. ctx_struct_fields : StructField.t StructName.Map.t IdentName.Map.t;
    (*

    needed for disambiguation (desugared -> scope)

    *)
  4. ctx_scopes : scope_out_struct ScopeName.Map.t;
}
type 'e program = {
  1. decl_ctx : decl_ctx;
  2. code_items : 'e code_item_list;
}
module Var : sig ... end
module Type : sig ... end
module Operator : sig ... end
module Expr : sig ... end

Functions handling the expressions of shared_ast

module Scope : sig ... end

Functions handling the code item structures of shared_ast, in particular the scopes

module Program : sig ... end
module Print : sig ... end

Printing functions for the default calculus AST

module Typing : sig ... end

Typing for the default calculus. Because of the error terms, we perform type inference using the classical W algorithm with union-find unification.