package phylogenetics

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file rate_matrix.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
open Core
open Linear_algebra

module type S = sig
  type vector
  type matrix
  type symbol
  type t = matrix

  val make : (symbol -> symbol -> float) -> t

  val make_symetric : (symbol -> symbol -> float) -> t

  val jc69 : unit -> t

  val gtr :
    stationary_distribution:vector ->
    exchangeabilities:matrix ->
    t

  val stationary_distribution : t -> vector

  val scaled_rate_matrix : vector -> t -> t

  val scale : t -> t
end

module Make(A : Alphabet.S_int) = struct
  type t = A.matrix
  let sum f =
    List.fold A.all ~init:0. ~f:(fun acc n -> acc +. f n)

  let stationary_distribution (m : A.matrix) =
    Matrix.zero_eigen_vector (m :> mat)
    |> A.Vector.upcast_exn

  let jc69 () =
    let r = Float.(1. / (of_int A.card - 1.)) in
    A.Matrix.init (fun i j -> if A.equal i j then -1. else r)

  let make f =
    let r = A.Matrix.init (fun _ _ -> 0.) in
    List.iter A.all ~f:(fun i->
        let total = ref 0. in
        List.iter A.all ~f:(fun j ->
            if not (A.equal i j) then (
              let r_ij = f i j in
              if Float.(r_ij < 0.) then (failwith "Rates should be positive") ;
              total := r_ij +. !total ;
              A.(r.%{i, j} <- r_ij)
            )
          ) ;
        A.(r.%{i, i} <- -. !total)
      ) ;
    r

  let make_symetric f =
    let r = A.Matrix.init (fun _ _ -> 0.) in
    List.iter A.all ~f:(fun i->
        List.iter A.all ~f:(fun j ->
            if (i :> int) > (j :> int) then (
              let r_ij = f i j in
              if Float.(r_ij < 0.) then (failwith "Rates should be positive") ;
              A.(r.%{i, j} <- r_ij) ;
              A.(r.%{j, i} <- r_ij)
            )
          ) ;
        let total = List.fold A.all ~init:0. ~f:(fun acc j -> acc +. r.A.%{i, j}) in
        A.(r.%{i, i} <- -. total)
      ) ;
    r

  let scaled_rate_matrix profile rate =
    let mu = -. sum Float.(fun i ->
        profile.A.%(i) * rate.A.%{i, i}
      )
    in
    make Float.(fun i j -> rate.A.%{i, j} / mu)

  let scale rate =
    let mu =
      sum (fun i ->
          sum (fun j ->
              if A.equal i j then 0. else rate.A.%{i, j}
            )
        )
    in
    A.Matrix.init Float.(fun i j -> rate.A.%{i, j} / mu)

  let ut_index i j =
    let n = A.card in
    n * (n - 1) / 2 - (n - i) * (n - i -1) / 2 + j - i - 1

  let%test "upper triangular indexation" =
    Poly.equal
      (
        List.init A.card ~f:(fun i ->
            List.init (A.card - i - 1) ~f:(fun j ->
                let j = i + j + 1 in
                ut_index i j
              )
          )
        |> List.concat
      )
      (List.init (A.card * (A.card - 1) / 2) ~f:Fn.id)

  let gtr ~stationary_distribution ~exchangeabilities =
    let m = make (fun i j ->
        A.Matrix.get exchangeabilities (i :> int) (j :> int) *. stationary_distribution.A.%(j)
      ) in
    scaled_rate_matrix stationary_distribution m

  let%test "gtr stationary distribution" =
    let rng = Utils.rng_of_int 12334 in
    let pi = A.random_profile rng 10. in
    let exchangeabilities = make_symetric (fun _ _ -> Gsl.Randist.gamma rng ~a:1. ~b:1.) in
    let gtr_rates = gtr ~stationary_distribution:pi ~exchangeabilities in
    let pi' = stationary_distribution gtr_rates in
    Vector.robust_equal ~tol:1e-6 (pi :> vec) (pi' :> vec)
end

let make n ~f =
  let r = Matrix.init n ~f:(fun _ _ -> 0.) in
  for i = 0 to n - 1 do
    let total = ref 0. in
    for j = 0 to n - 1 do
      if i <> j then (
        let r_ij = f i j in
        if Float.(r_ij < 0.) then (failwith "Rates should be positive") ;
        total := r_ij +. !total ;
        Matrix.set r i j r_ij
      )
    done ;
    Matrix.set r i i (-. !total)
  done ;
  r

let transition_probability_matrix ~tau ~rates =
  Matrix.((
      (of_arrays_exn rates
       |> scal_mul tau
       |> expm) :> Lacaml.D.mat)
    )
  |> Lacaml.D.Mat.to_array

module Nucleotide = struct
  include Make(Nucleotide)

  let k80 kappa =
    Nucleotide.Matrix.init (fun i j ->
        if Nucleotide.equal i j then -1.
        else if Nucleotide.transversion i j then 1. /. (kappa +. 2.)
        else kappa /. (kappa +. 2.)
      )

  let hky85
      ~(stationary_distribution : Nucleotide.vector)
      ~transition_rate ~transversion_rate =
    let m = make (fun i j ->
        let coef = if Nucleotide.equal i j
          then -1.
          else if Nucleotide.transversion i j
          then transversion_rate
          else transition_rate
        in
        coef *. stationary_distribution.Nucleotide.%(j)
      ) in
    scaled_rate_matrix stationary_distribution m

  let%test "HKY85 stationary distribution" =
    let rng = Utils.rng_of_int 420 in
    let pi = Nucleotide.random_profile rng 10. in
    let transition_rate = Gsl.Rng.uniform rng
    and transversion_rate = Gsl.Rng.uniform rng in
    let hky_rates = hky85 ~stationary_distribution:pi ~transition_rate ~transversion_rate
    in
    let pi' = stationary_distribution hky_rates in
    Vector.robust_equal ~tol:1e-6 (pi :> vec) (pi' :> vec)
end

module Amino_acid = struct
  include Make(Amino_acid)
end