package biocaml

  1. Overview
  2. Docs
Legend:
Library
Module
Module type
Parameter
Class
Class type
include module type of struct include Biocaml_unix.Sam end

Types

SAM files. Documentation here assumes familiarity with the SAM specification.

Header Types

type header_item_tag = private [<
  1. | `HD
  2. | `SQ
  3. | `RG
  4. | `PG
  5. | `CO
  6. | `Other of string
]

Header item tags define the different types of header lines. The term "tag" in this context should not be confused with its use in "tag-value" pairs, which comprise the content of header items.

include sig ... end
val header_item_tag_of_sexp : Sexplib.Sexp.t -> header_item_tag
val __header_item_tag_of_sexp__ : Sexplib.Sexp.t -> header_item_tag
val sexp_of_header_item_tag : header_item_tag -> Sexplib.Sexp.t
type tag_value = private string * string

A tag-value pair comprising the content of header items. Tag-value pairs occur in other places too, but this type is specifically for those in the header.

include sig ... end
val tag_value_of_sexp : Sexplib.Sexp.t -> tag_value
val sexp_of_tag_value : tag_value -> Sexplib.Sexp.t
type sort_order = [
  1. | `Unknown
  2. | `Unsorted
  3. | `Query_name
  4. | `Coordinate
]
include sig ... end
val sort_order_of_sexp : Sexplib.Sexp.t -> sort_order
val __sort_order_of_sexp__ : Sexplib.Sexp.t -> sort_order
val sexp_of_sort_order : sort_order -> Sexplib.Sexp.t
type group_order = [
  1. | `None
  2. | `Query
  3. | `Reference
]
include sig ... end
val group_order_of_sexp : Sexplib.Sexp.t -> group_order
val __group_order_of_sexp__ : Sexplib.Sexp.t -> group_order
val sexp_of_group_order : group_order -> Sexplib.Sexp.t
type header_line = private Biocaml_unix.Sam.header_line = {
  1. version : string;
    (*

    VN

    *)
  2. sort_order : sort_order option;
    (*

    SO

    *)
  3. group_order : group_order option;
    (*

    GO

    *)
}

@HD. A header consists of different types of lines. Confusingly, one of these types is called the "header line", which is what this type refers to. It does not refer generically to any line within a header.

include sig ... end
val header_line_of_sexp : Sexplib.Sexp.t -> header_line
val sexp_of_header_line : header_line -> Sexplib.Sexp.t
type ref_seq = private Biocaml_unix.Sam.ref_seq = {
  1. name : string;
    (*

    SN

    *)
  2. length : int;
    (*

    LN

    *)
  3. assembly : string option;
    (*

    AS

    *)
  4. md5 : string option;
    (*

    M5

    *)
  5. species : string option;
    (*

    SP

    *)
  6. uri : string option;
    (*

    UR

    *)
}

@SQ. Reference sequence.

include sig ... end
val ref_seq_of_sexp : Sexplib.Sexp.t -> ref_seq
val sexp_of_ref_seq : ref_seq -> Sexplib.Sexp.t
type platform = [
  1. | `Capillary
  2. | `LS454
  3. | `Illumina
  4. | `Solid
  5. | `Helicos
  6. | `Ion_Torrent
  7. | `Pac_Bio
]
include sig ... end
val platform_of_sexp : Sexplib.Sexp.t -> platform
val __platform_of_sexp__ : Sexplib.Sexp.t -> platform
val sexp_of_platform : platform -> Sexplib.Sexp.t
type read_group = private Biocaml_unix.Sam.read_group = {
  1. id : string;
    (*

    ID

    *)
  2. seq_center : string option;
    (*

    CN

    *)
  3. description : string option;
    (*

    DS

    *)
  4. run_date : [ `Date of string | `Time of string ] option;
    (*

    DT

    *)
  5. flow_order : string option;
    (*

    FO

    *)
  6. key_seq : string option;
    (*

    KS

    *)
  7. library : string option;
    (*

    LB

    *)
  8. program : string option;
    (*

    PG

    *)
  9. predicted_median_insert_size : int option;
    (*

    PI

    *)
  10. platform : platform option;
    (*

    PL

    *)
  11. platform_unit : string option;
    (*

    PU

    *)
  12. sample : string option;
    (*

    SM

    *)
}

@RG.

include sig ... end
val read_group_of_sexp : Sexplib.Sexp.t -> read_group
val sexp_of_read_group : read_group -> Sexplib.Sexp.t
type program = private Biocaml_unix.Sam.program = {
  1. id : string;
    (*

    ID

    *)
  2. name : string option;
    (*

    PN

    *)
  3. command_line : string option;
    (*

    CL

    *)
  4. previous_id : string option;
    (*

    PP

    *)
  5. description : string option;
    (*

    DS

    *)
  6. version : string option;
    (*

    VN

    *)
}

@PG.

include sig ... end
val program_of_sexp : Sexplib.Sexp.t -> program
val sexp_of_program : program -> Sexplib.Sexp.t
type header_item = private [<
  1. | `HD of header_line
  2. | `SQ of ref_seq
  3. | `RG of read_group
  4. | `PG of program
  5. | `CO of string
  6. | `Other of string * tag_value list
]
include sig ... end
val header_item_of_sexp : Sexplib.Sexp.t -> header_item
val __header_item_of_sexp__ : Sexplib.Sexp.t -> header_item
val sexp_of_header_item : header_item -> Sexplib.Sexp.t
type header = private Biocaml_unix.Sam.header = {
  1. version : string option;
  2. sort_order : sort_order option;
  3. group_order : group_order option;
  4. ref_seqs : ref_seq list;
  5. read_groups : read_group list;
  6. programs : program list;
  7. comments : string list;
  8. others : (string * tag_value list) list;
}
  • sort_order: Guaranteed to be None if version = None.
  • ref_seqs: List of @SQ items. Order matters; it dictates alignment sorting order when sort_order = `Coordinate.
  • read_groups: Unordered list of @RG items.
  • programs: List of @PG lines. Currently unordered, but we should topologically sort.
  • comments: Unordered list of @CO lines.
val empty_header : header
module Flags = Biocaml_unix.Sam.Flags
type cigar_op = private [<
  1. | `Alignment_match of int
  2. | `Insertion of int
  3. | `Deletion of int
  4. | `Skipped of int
  5. | `Soft_clipping of int
  6. | `Hard_clipping of int
  7. | `Padding of int
  8. | `Seq_match of int
  9. | `Seq_mismatch of int
]

CIGAR operations.

include sig ... end
val cigar_op_of_sexp : Sexplib.Sexp.t -> cigar_op
val __cigar_op_of_sexp__ : Sexplib.Sexp.t -> cigar_op
val sexp_of_cigar_op : cigar_op -> Sexplib.Sexp.t
type optional_field_value = private [<
  1. | `A of char
  2. | `i of Core_kernel.Int64.t
  3. | `f of float
  4. | `Z of string
  5. | `H of string
  6. | `B of char * string list
]

The constructor encodes the TYPE and each carries its corresponding VALUE.

include sig ... end
val optional_field_value_of_sexp : Sexplib.Sexp.t -> optional_field_value
val __optional_field_value_of_sexp__ : Sexplib.Sexp.t -> optional_field_value
val sexp_of_optional_field_value : optional_field_value -> Sexplib.Sexp.t
type optional_field = private Biocaml_unix.Sam.optional_field = {
  1. tag : string;
  2. value : optional_field_value;
}
include sig ... end
val optional_field_of_sexp : Sexplib.Sexp.t -> optional_field
val sexp_of_optional_field : optional_field -> Sexplib.Sexp.t
type rnext = private [<
  1. | `Value of string
  2. | `Equal_to_RNAME
]
include sig ... end
val rnext_of_sexp : Sexplib.Sexp.t -> rnext
val __rnext_of_sexp__ : Sexplib.Sexp.t -> rnext
val sexp_of_rnext : rnext -> Sexplib.Sexp.t
type alignment = private Biocaml_unix.Sam.alignment = {
  1. qname : string option;
    (*

    QNAME

    *)
  2. flags : Flags.t;
    (*

    FLAG

    *)
  3. rname : string option;
    (*

    RNAME

    *)
  4. pos : int option;
    (*

    POS

    *)
  5. mapq : int option;
    (*

    MAPQ

    *)
  6. cigar : cigar_op list;
    (*

    CIGAR

    *)
  7. rnext : rnext option;
    (*

    RNEXT

    *)
  8. pnext : int option;
    (*

    PNEXT

    *)
  9. tlen : int option;
    (*

    TLEN

    *)
  10. seq : string option;
    (*

    SEQ

    *)
  11. qual : Biocaml_unix.Phred_score.t list;
    (*

    QUAL

    *)
  12. optional_fields : optional_field list;
}

For cigar and qual, empty list indicates no value, i.e. '*', was given.

include sig ... end
val alignment_of_sexp : Sexplib.Sexp.t -> alignment
val sexp_of_alignment : alignment -> Sexplib.Sexp.t
module MakeIO = Biocaml_unix.Sam.MakeIO

Low-level Parsers and Constructors

val header_line : version:string -> ?sort_order:sort_order -> ?group_order:group_order -> unit -> header_line Core_kernel.Or_error.t

Low-level Header Parsers and Constructors

val ref_seq : name:string -> length:int -> ?assembly:string -> ?md5:string -> ?species:string -> ?uri:string -> unit -> ref_seq Core_kernel.Or_error.t
val read_group : id:string -> ?seq_center:string -> ?description:string -> ?run_date:string -> ?flow_order:string -> ?key_seq:string -> ?library:string -> ?program:string -> ?predicted_median_insert_size:int -> ?platform:platform -> ?platform_unit:string -> ?sample:string -> unit -> read_group Core_kernel.Or_error.t

The run_date string will be parsed as a Date.t or Time.t, whichever is possible. If it is a time without a timezone, local timezone will be assumed.

val header : ?version:string -> ?sort_order:sort_order -> ?group_order:group_order -> ?ref_seqs:ref_seq list -> ?read_groups:read_group list -> ?programs:program list -> ?comments:string list -> ?others:(string * tag_value list) list -> unit -> header Core_kernel.Or_error.t
val parse_header_item_tag : string -> header_item_tag Core_kernel.Or_error.t
val parse_tag_value : string -> tag_value Core_kernel.Or_error.t
val parse_header_version : string -> string Core_kernel.Or_error.t
val parse_sort_order : string -> sort_order Core_kernel.Or_error.t
val parse_header_line : tag_value list -> header_line Core_kernel.Or_error.t
val parse_ref_seq : tag_value list -> ref_seq Core_kernel.Or_error.t
val parse_platform : string -> platform Core_kernel.Or_error.t
val parse_read_group : tag_value list -> read_group Core_kernel.Or_error.t
val parse_program : tag_value list -> program Core_kernel.Or_error.t
val parse_header : string -> header Core_kernel.Or_error.t
val cigar_op_alignment_match : int -> cigar_op Core_kernel.Or_error.t

Low-level Optional field Parsers and Constructors

val cigar_op_insertion : int -> cigar_op Core_kernel.Or_error.t
val cigar_op_deletion : int -> cigar_op Core_kernel.Or_error.t
val cigar_op_skipped : int -> cigar_op Core_kernel.Or_error.t
val cigar_op_soft_clipping : int -> cigar_op Core_kernel.Or_error.t
val cigar_op_hard_clipping : int -> cigar_op Core_kernel.Or_error.t
val cigar_op_padding : int -> cigar_op Core_kernel.Or_error.t
val cigar_op_seq_match : int -> cigar_op Core_kernel.Or_error.t
val cigar_op_seq_mismatch : int -> cigar_op Core_kernel.Or_error.t
val optional_field_value_A : char -> optional_field_value Core_kernel.Or_error.t

Low-level Optional field Parsers and Constructors

val optional_field_value_i : Core_kernel.Int64.t -> optional_field_value
val optional_field_value_f : float -> optional_field_value
val optional_field_value_Z : string -> optional_field_value Core_kernel.Or_error.t
val optional_field_value_H : string -> optional_field_value Core_kernel.Or_error.t
val optional_field_value_B : char -> string list -> optional_field_value Core_kernel.Or_error.t
val parse_optional_field_value : string -> optional_field_value Core_kernel.Or_error.t
val parse_optional_field : string -> optional_field Core_kernel.Or_error.t
val alignment : ?ref_seqs:Core_kernel.String.Set.t -> ?qname:string -> flags:Flags.t -> ?rname:string -> ?pos:int -> ?mapq:int -> ?cigar:cigar_op list -> ?rnext:rnext -> ?pnext:int -> ?tlen:int -> ?seq:string -> ?qual:Biocaml_unix.Phred_score.t list -> ?optional_fields:optional_field list -> unit -> alignment Core_kernel.Or_error.t

Low-level Alignment Parsers and Constructors

val parse_qname : string -> string option Core_kernel.Or_error.t
val parse_flags : string -> Flags.t Core_kernel.Or_error.t
val parse_rname : string -> string option Core_kernel.Or_error.t
val parse_pos : string -> int option Core_kernel.Or_error.t
val parse_mapq : string -> int option Core_kernel.Or_error.t
val parse_cigar : string -> cigar_op list Core_kernel.Or_error.t
val parse_rnext : string -> rnext option Core_kernel.Or_error.t
val parse_pnext : string -> int option Core_kernel.Or_error.t
val parse_tlen : string -> int option Core_kernel.Or_error.t
val parse_seq : string -> string option Core_kernel.Or_error.t
val parse_qual : string -> Biocaml_unix.Phred_score.t list Core_kernel.Or_error.t

Low-level Printers

val print_header_item_tag : header_item_tag -> string

Low-level Header Printers

val print_tag_value : tag_value -> string
val print_header_version : string -> string
val print_sort_order : sort_order -> string
val print_header_line : header_line -> string
val print_ref_seq : ref_seq -> string
val print_platform : platform -> string
val print_read_group : read_group -> string
val print_program : program -> string
val print_other : (string * tag_value list) -> string
val print_qname : string option -> string

Low-level Alignment Printers

val print_flags : Flags.t -> string
val print_rname : string option -> string
val print_pos : int option -> string
val print_mapq : int option -> string
val print_cigar_op : cigar_op -> string
val print_cigar : cigar_op list -> string
val print_rnext : rnext option -> string
val print_pnext : int option -> string
val print_tlen : int option -> string
val print_seq : string option -> string
val print_qual : Biocaml_unix.Phred_score.t list -> string
val print_optional_field : optional_field -> string
val print_alignment : alignment -> string
OCaml

Innovation. Community. Security.