package phylogenetics
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>
Algorithms and datastructures for phylogenetics
Install
dune-project
Dependency
Authors
Maintainers
Sources
phylogenetics-0.3.0.tbz
sha256=de867d7cc017a8e434dab43ef16f0f6495973892cd7b6a8446b18e79393704a8
sha512=0209538caf94be47eabcaa25399c54849bd4fa0fc79e0579acee27f46ef3b72aa50e17bdb48fed8e86674d4caee6c1c4c423833a2757db12e2a6cc28234510de
doc/src/phylogenetics/codon.ml.html
Source file codon.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165open Base module type S = sig include Alphabet.S_int val to_string : t -> string val of_string : string -> t option val neighbours : t -> t -> (int * Nucleotide.t * Nucleotide.t) option val nucleotides : t -> Nucleotide.t * Nucleotide.t * Nucleotide.t end module Impl(X : sig val triplets : string array end) = struct include Alphabet.Make(struct let card = Array.length X.triplets end) let nucleotides = Array.map X.triplets ~f:(fun c -> Array.init (String.length c) ~f:(fun i -> Nucleotide.of_char_exn c.[i]) ) let hash_table = Array.to_list X.triplets |> List.mapi ~f:(fun i t -> t, i) |> Core.String.Table.of_alist_exn let to_string i = X.triplets.(i) let of_string s = Hashtbl.find hash_table s let neighbours p q = let p_s = X.triplets.(p) and q_s = X.triplets.(q) in match Char.(p_s.[0] = q_s.[0], p_s.[1] = q_s.[1], p_s.[2] = q_s.[2]) with | false, true, true -> Some (0, nucleotides.(p).(0), nucleotides.(q).(0)) | true, false, true -> Some (1, nucleotides.(p).(1), nucleotides.(q).(1)) | true, true, false -> Some (2, nucleotides.(p).(2), nucleotides.(q).(2)) | _ -> None let%test _ = Poly.(neighbours 0 5 = None) let nucleotides p = nucleotides.(p).(0), nucleotides.(p).(1), nucleotides.(p).(2) let to_codon x = x end let all_triplets = [| "TTT"; "TTC"; "TTA"; "TTG"; "TCT"; "TCC"; "TCA"; "TCG"; "TAT"; "TAC"; "TAA"; "TAG"; "TGT"; "TGC"; "TGA"; "TGG"; "CTT"; "CTC"; "CTA"; "CTG"; "CCT"; "CCC"; "CCA"; "CCG"; "CAT"; "CAC"; "CAA"; "CAG"; "CGT"; "CGC"; "CGA"; "CGG"; "ATT"; "ATC"; "ATA"; "ATG"; "ACT"; "ACC"; "ACA"; "ACG"; "AAT"; "AAC"; "AAA"; "AAG"; "AGT"; "AGC"; "AGA"; "AGG"; "GTT"; "GTC"; "GTA"; "GTG"; "GCT"; "GCC"; "GCA"; "GCG"; "GAT"; "GAC"; "GAA"; "GAG"; "GGT"; "GGC"; "GGA"; "GGG" |] let decode_ncbi_string s = let n = String.length s in Array.init n ~f:(fun i -> match Amino_acid.of_char s.[i] with | Some aa -> Amino_acid.to_int aa | None -> -1 ) type genetic_code = int * string * string (* adapted from https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c *) let genetic_codes = [ 1, "Standard", "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 2, "Vertebrate Mitochondrial", "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG" ; 3, "Yeast Mitochondrial", "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 4, "Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma", "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 5, "Invertebrate Mitochondrial", "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG" ; 6, "Ciliate, Dasycladacean and Hexamita Nuclear", "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 9, "Echinoderm and Flatworm Mitochondrial", "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG" ; 10, "Euplotid Nuclear", "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 11, "Bacterial, Archaeal and Plant Plastid", "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 12, "Alternative Yeast Nuclear", "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 13, "Ascidian Mitochondrial", "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG" ; 14, "Alternative Flatworm Mitochondrial", "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG" ; 16, "Chlorophycean Mitochondrial", "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 21, "Trematode Mitochondrial", "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG" ; 22, "Scenedesmus obliquus Mitochondrial", "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 23, "Thraustochytrium Mitochondrial", "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 24, "Rhabdopleuridae Mitochondrial", "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG" ; 25, "Candidate Division SR1 and Gracilibacteria", "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 26, "Pachysolen tannophilus Nuclear", "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 27, "Karyorelict Nuclear", "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 28, "Condylostoma Nuclear", "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 29, "Mesodinium Nuclear", "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 30, "Peritrich Nuclear", "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 31, "Blastocrithidia Nuclear", "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" ; 33, "Cephalodiscidae Mitochondrial UAA-Tyr", "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG" ; ] let transl_table (i, _, _) = i let label_of_genetic_code (_, l, _) = l include Impl(struct let triplets = all_triplets end) module type Genetic_code = sig type codon = t val stop_codons : codon list val is_stop_codon : codon -> bool val aa_of_codon : codon -> Amino_acid.t option val synonym : codon -> codon -> bool module NS : sig include S val to_codon : t -> codon val aa_of_codon : t -> Amino_acid.t val synonym : t -> t -> bool val of_int_exn : int -> t end end module Genetic_code_impl(X : sig val code_array : int array end) = struct open X type codon = t let code = Array.map ~f:Amino_acid.of_int code_array let aa_of_codon i = code.(i) let stop_codons = Array.filter_mapi code ~f:(fun i x -> if Option.is_none x then Some i else None) let is_stop_codon c = Array.mem stop_codons c ~equal:( = ) let synonym p q = Poly.(code.(p) = code.(q)) module NS = struct let triplets = Array.filteri all_triplets ~f:(fun i _ -> code_array.(i) >= 0) let code_array = Array.filter code_array ~f:(fun i -> i >= 0) let code = Array.map ~f:Amino_acid.of_int_exn code_array include Impl(struct let triplets = triplets end) let aa_of_codon i = code.(i) let synonym p q = Poly.(code.(p) = code.(q)) let of_int_exn i = if i < 0 || i >= card then raise (Invalid_argument "Codon.Genetic_code_impl.NS.of_int_exn") else i end let stop_codons = Array.to_list stop_codons end let genetic_code_impl (_, _, code) = let module X = struct let code_array = decode_ncbi_string code end in let module M = Genetic_code_impl(X) in (module M : Genetic_code) module Universal_genetic_code = struct let m = genetic_code_impl (List.hd_exn genetic_codes) include (val m) (* probably useless optimization *) let is_stop_codon c = c = 10 || c = 11 || c = 14 end let%test "universal code" = let s = Array.map all_triplets ~f:of_string |> Array.to_list |> Option.all |> Option.value_exn |> List.map ~f:(fun c -> Universal_genetic_code.aa_of_codon c |> Option.value_map ~default:'*' ~f:Amino_acid.to_char ) |> String.of_char_list in String.equal s "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>