package biocaml

You can search for identifiers within the package.

in-package search v0.2.0

biocaml.base
- Biocaml_base
  - Bed
    
    Bed3
    
    Bed4
    
    Bed5
    
    Bed5_raw
  - Fasta
    
    Parser
    
    Parser0
  - Gff
  - Line
  - Lines
    
    Parser
  - Macs2
    
    Broad_peaks
    
    Xls
  - Table
    
    Field
  - Ucsc_genome_browser
biocaml.ez
- Biocaml_ez
  - Bam
  - Bamstats
  - Fasta
  - Fastq
    
    Illumina
  - Lines
    
    Buffer
    
    MakeIO
    
    Future
    
    Deferred
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    List
    
    Monad_infix
    
    Or_error
    
    List
    
    In_thread
    
    Pipe
    
    Reader
    
    Reader
    
    Read_result
    
    Unix
    
    Writer
    
    Transform
  - Phred_score
  - Range
  - Roman_num
  - Sam
  - Seq_range
  - Strand
biocaml.unix
- Biocaml_unix
  - Accu
    
    Counter
    
    Relation
  - Bam
    
    Alignment0
    
    Header
  - Bamstats
    
    Chr_histogram
    
    Fragment_length_histogram
  - Bar
  - Bed
    
    Error
    
    Transform
  - Bgzf
  - Bin_pred
  - Biocaml_result
    
    Export
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    List
    
    Monad_infix
    
    Stable
    
    V1
    
    V1_stable_unit_test
  - Bpmap
  - Cel
  - Chr
    
    Error
  - Entrez
    
    Fetch
    
    Make
    
    Dbtag
    
    F
    
    Gene
    
    Gene_ref
    
    Object_id
    
    Pubmed
    
    PubmedSummary
  - Fasta
  - Fastq
    
    Illumina
    
    MakeIO
    
    Future
    
    Deferred
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    List
    
    Monad_infix
    
    Or_error
    
    List
    
    In_thread
    
    Pipe
    
    Reader
    
    Reader
    
    Read_result
    
    Unix
    
    Writer
  - File_mapper
  - Future
    
    S
    
    Deferred
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    List
    
    Monad_infix
    
    Or_error
    
    List
    
    In_thread
    
    Pipe
    
    Reader
    
    Reader
    
    Read_result
    
    Unix
    
    Writer
  - Future_unix
    
    Deferred
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    List
    
    Monad_infix
    
    Or_error
    
    List
    
    In_thread
    
    Pipe
    
    Reader
    
    Reader
    
    Read_result
    
    Unix
    
    Writer
  - GenomeMap
    
    Chromosome
    
    Make
    
    Chromosome
    
    LMap
    
    LSet
    
    Selection
    
    Signal
  - Gff
    
    Error
    
    Tags
    
    Transform
  - Histogram
  - Interval_tree
  - Iset
  - Jaspar
  - Line
  - Lines
    
    Buffer
    
    MakeIO
    
    Future
    
    Deferred
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    List
    
    Monad_infix
    
    Or_error
    
    List
    
    In_thread
    
    Pipe
    
    Reader
    
    Reader
    
    Read_result
    
    Unix
    
    Writer
    
    Transform
  - Math
  - Msg
    
    Tree
  - MzData
    
    Precursor
  - Phred_score
  - Pos
  - Psl
    
    Error
    
    Transform
  - Pwm
  - RSet
  - Range
  - Roman_num
  - Sam
    
    Flags
    
    MakeIO
    
    Future
    
    Deferred
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    List
    
    Monad_infix
    
    Or_error
    
    List
    
    In_thread
    
    Pipe
    
    Reader
    
    Reader
    
    Read_result
    
    Unix
    
    Writer
  - Sbml
  - Seq
  - Seq_range
    
    Identifier
    
    Map
    
    Key
    
    Provide_bin_io
    
    Key
    
    Provide_hash
    
    Key
    
    Provide_of_sexp
    
    Key
    
    Tree
    
    Provide_of_sexp
    
    K
    
    Replace_polymorphic_compare
    
    Set
    
    Elt
    
    Provide_bin_io
    
    Elt
    
    Provide_hash
    
    Elt
    
    Provide_of_sexp
    
    Elt
    
    Tree
    
    Provide_of_sexp
    
    Elt
    
    Make
    
    S
    
    Map
    
    Key
    
    Provide_bin_io
    
    Key
    
    Provide_hash
    
    Key
    
    Provide_of_sexp
    
    Key
    
    Tree
    
    Provide_of_sexp
    
    K
    
    Replace_polymorphic_compare
    
    Set
    
    Elt
    
    Provide_bin_io
    
    Elt
    
    Provide_hash
    
    Elt
    
    Provide_of_sexp
    
    Elt
    
    Tree
    
    Provide_of_sexp
    
    Elt
  - Sgr
  - Solexa_score
  - Strand
  - Table
    
    Row
    
    Error
    
    Tags
    
    Transform
  - Tfxm
    
    object_t
  - Track
    
    Error
    
    Transform
  - Transcripts
  - Vcf
    
    Transform
  - Wig
    
    Error
    
    Tags
    
    Transform
  - Zip
    
    Default
    
    Error
    
    Transform

Legend:
Library
Module
Module type
Parameter
Class
Class type

include module type of struct include Biocaml_unix.Fasta end

FASTA files. The FASTA family of file formats has different incompatible descriptions (1, 2, 3, 4, etc.). Roughly FASTA files are in the format:

    # comment
    # comment
    ...
    >description
    sequence
    >description
    sequence
    ...

Comment lines are allowed at the top of the file. Usually comments start with a '#' but sometimes with a ';' character. The fmt properties allow configuring which is allowed during parsing and printing.

Description lines begin with the '>' character. Various conventions are used for the content but there is no requirement. We simply return the string following the '>' character.

Sequences are most often a sequence of characters denoting nucleotides or amino acids, and thus an item's sequence field is set to a string. Sequences may span multiple lines.

However, sequence lines sometimes are used to provide quality scores, either as space separated integers or as ASCII encoded scores. To support the former case, we provide the sequence_to_int_list function. For the latter case, see modules Phred_score and Solexa_score.

FASTA files are used to provide both short sequences and very big sequences, e.g. a genome. In the latter case, the main API of this module, which returns each sequence as an in-memory string, might be too costly. Consider using instead the read0 function which does not merge multiple sequence lines into one string. This API is slightly more difficult to use but perhaps a worthwhile trade-off.

Some FASTA files include very large sequences on a single line. This is discouraged and not well supported by this module. Functions in this module require memory proportional to the length of a line. Thus, a whole chromosomal sequence on a single line will consume a large amount of memory. This might not be a problem given the RAM on most computers.

Format Specifiers:

Variations in the format are controlled by the following settings, all of which have a default value. These properties are combined into the fmt type for convenience and the defaults into default_fmt.

allow_sharp_comments: Allow comment lines beginning with a '#' character. Default: true.

allow_semicolon_comments: Allow comment lines beginning with a ';' character. Default: false.

Setting both allow_sharp_comments and allow_semicolon_comments allows both. Setting both to false disallows comment lines.

allow_empty_lines: Allow lines with only whitespace anywhere in the file. Default: false.

comments_only_at_top: Allow comments only at the top of the file. If false, comment lines can occur anywhere but only the ones at the top are returned. The rest are ignored. Default: true.

max_line_length: Require sequence lines to be shorter than given length. None means there is no restriction. Note this does not restrict the length of an item's sequence field because this can span multiple lines. Default: None.

alphabet: Require sequence characters to be at most those in given string. None means any character is allowed. Default: None.

type header = private string list

A header is a list of comment lines.

type item = private Biocaml_unix.Fasta.item = {

description : string;
sequence : string;

}

type fmt = Biocaml_unix.Fasta.fmt = {

allow_sharp_comments : bool;
allow_semicolon_comments : bool;
allow_empty_lines : bool;
comments_only_at_top : bool;
max_line_length : int option;
alphabet : string option;

}

val default_fmt : fmt

Low-level Parsing

type item0 = private [<

| `Comment of string
| `Empty_line
| `Description of string
| `Partial_sequence of string

]

An item0 is more raw than item. It is useful for parsing files with large sequences because you get the sequence in smaller pieces.

`Comment _ - Single comment line without the final newline. Initial comment char is retained.

`Empty_line - Got a line with only whitespace characters. The contents are not provided.

`Description _ - Single description line without the initial '>' nor final newline.

`Partial_sequence _ - Multiple sequential partial sequences comprise the sequence of a single item.

val parse_item0 : 
  ?allow_sharp_comments:bool ->
  ?allow_semicolon_comments:bool ->
  ?allow_empty_lines:bool ->
  ?max_line_length:int ->
  ?alphabet:string ->
  Biocaml_unix.Line.t ->
  item0 Core_kernel.Or_error.t

val sequence_to_int_list : string -> int list

val read0 : 
  ?start:Biocaml_unix.Pos.t ->
  ?allow_sharp_comments:bool ->
  ?allow_semicolon_comments:bool ->
  ?allow_empty_lines:bool ->
  ?max_line_length:int ->
  ?alphabet:string ->
  Core_kernel.In_channel.t ->
  item0 CFStream.Stream.t

val read : 
  ?start:Biocaml_unix.Pos.t ->
  ?fmt:fmt ->
  Core_kernel.In_channel.t ->
  header * item CFStream.Stream.t

val with_file : 
  ?fmt:fmt ->
  string ->
  f:(header -> item CFStream.Stream.t -> 'a) ->
  'a