NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
kfunc.c
Go to the documentation of this file.
1 #include <math.h>
2 #include <stdlib.h>
3 #include "htslib/kfunc.h"
4 
5 /* Log gamma function
6  * \log{\Gamma(z)}
7  * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
8  */
9 double kf_lgamma(double z)
10 {
11  double x = 0;
12  x += 0.1659470187408462e-06 / (z+7);
13  x += 0.9934937113930748e-05 / (z+6);
14  x -= 0.1385710331296526 / (z+5);
15  x += 12.50734324009056 / (z+4);
16  x -= 176.6150291498386 / (z+3);
17  x += 771.3234287757674 / (z+2);
18  x -= 1259.139216722289 / (z+1);
19  x += 676.5203681218835 / z;
20  x += 0.9999999999995183;
21  return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5);
22 }
23 
24 /* complementary error function
25  * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt
26  * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66
27  */
28 double kf_erfc(double x)
29 {
30  const double p0 = 220.2068679123761;
31  const double p1 = 221.2135961699311;
32  const double p2 = 112.0792914978709;
33  const double p3 = 33.912866078383;
34  const double p4 = 6.37396220353165;
35  const double p5 = .7003830644436881;
36  const double p6 = .03526249659989109;
37  const double q0 = 440.4137358247522;
38  const double q1 = 793.8265125199484;
39  const double q2 = 637.3336333788311;
40  const double q3 = 296.5642487796737;
41  const double q4 = 86.78073220294608;
42  const double q5 = 16.06417757920695;
43  const double q6 = 1.755667163182642;
44  const double q7 = .08838834764831844;
45  double expntl, z, p;
46  z = fabs(x) * M_SQRT2;
47  if (z > 37.) return x > 0.? 0. : 2.;
48  expntl = exp(z * z * - .5);
49  if (z < 10. / M_SQRT2) // for small z
50  p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0)
51  / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0);
52  else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65)))));
53  return x > 0.? 2. * p : 2. * (1. - p);
54 }
55 
56 /* The following computes regularized incomplete gamma functions.
57  * Formulas are taken from Wiki, with additional input from Numerical
58  * Recipes in C (for modified Lentz's algorithm) and AS245
59  * (http://lib.stat.cmu.edu/apstat/245).
60  *
61  * A good online calculator is available at:
62  *
63  * http://www.danielsoper.com/statcalc/calc23.aspx
64  *
65  * It calculates upper incomplete gamma function, which equals
66  * kf_gammaq(s,z)*tgamma(s).
67  */
68 
69 #define KF_GAMMA_EPS 1e-14
70 #define KF_TINY 1e-290
71 
72 // regularized lower incomplete gamma function, by series expansion
73 static double _kf_gammap(double s, double z)
74 {
75  double sum, x;
76  int k;
77  for (k = 1, sum = x = 1.; k < 100; ++k) {
78  sum += (x *= z / (s + k));
79  if (x / sum < KF_GAMMA_EPS) break;
80  }
81  return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum));
82 }
83 // regularized upper incomplete gamma function, by continued fraction
84 static double _kf_gammaq(double s, double z)
85 {
86  int j;
87  double C, D, f;
88  f = 1. + z - s; C = f; D = 0.;
89  // Modified Lentz's algorithm for computing continued fraction
90  // See Numerical Recipes in C, 2nd edition, section 5.2
91  for (j = 1; j < 100; ++j) {
92  double a = j * (s - j), b = (j<<1) + 1 + z - s, d;
93  D = b + a * D;
94  if (D < KF_TINY) D = KF_TINY;
95  C = b + a / C;
96  if (C < KF_TINY) C = KF_TINY;
97  D = 1. / D;
98  d = C * D;
99  f *= d;
100  if (fabs(d - 1.) < KF_GAMMA_EPS) break;
101  }
102  return exp(s * log(z) - z - kf_lgamma(s) - log(f));
103 }
104 
105 double kf_gammap(double s, double z)
106 {
107  return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z);
108 }
109 
110 double kf_gammaq(double s, double z)
111 {
112  return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z);
113 }
114 
115 /* Regularized incomplete beta function. The method is taken from
116  * Numerical Recipe in C, 2nd edition, section 6.4. The following web
117  * page calculates the incomplete beta function, which equals
118  * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b):
119  *
120  * http://www.danielsoper.com/statcalc/calc36.aspx
121  */
122 static double kf_betai_aux(double a, double b, double x)
123 {
124  double C, D, f;
125  int j;
126  if (x == 0.) return 0.;
127  if (x == 1.) return 1.;
128  f = 1.; C = f; D = 0.;
129  // Modified Lentz's algorithm for computing continued fraction
130  for (j = 1; j < 200; ++j) {
131  double aa, d;
132  int m = j>>1;
133  aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1))
134  : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m));
135  D = 1. + aa * D;
136  if (D < KF_TINY) D = KF_TINY;
137  C = 1. + aa / C;
138  if (C < KF_TINY) C = KF_TINY;
139  D = 1. / D;
140  d = C * D;
141  f *= d;
142  if (fabs(d - 1.) < KF_GAMMA_EPS) break;
143  }
144  return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f;
145 }
146 double kf_betai(double a, double b, double x)
147 {
148  return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x);
149 }
150 
151 #ifdef KF_MAIN
152 #include <stdio.h>
153 int main(int argc, char *argv[])
154 {
155  double x = 5.5, y = 3;
156  double a, b;
157  printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x));
158  printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y));
159  a = 2; b = 2; x = 0.5;
160  printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b)));
161  return 0;
162 }
163 #endif
164 
165 
166 // log\binom{n}{k}
167 static double lbinom(int n, int k)
168 {
169  if (k == 0 || n == k) return 0;
170  return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);
171 }
172 
173 // n11 n12 | n1_
174 // n21 n22 | n2_
175 //-----------+----
176 // n_1 n_2 | n
177 
178 // hypergeometric distribution
179 static double hypergeo(int n11, int n1_, int n_1, int n)
180 {
181  return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1));
182 }
183 
184 typedef struct {
185  int n11, n1_, n_1, n;
186  double p;
187 } hgacc_t;
188 
189 // incremental version of hypergenometric distribution
190 static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux)
191 {
192  if (n1_ || n_1 || n) {
193  aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;
194  } else { // then only n11 changed; the rest fixed
195  if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {
196  if (n11 == aux->n11 + 1) { // incremental
197  aux->p *= (double)(aux->n1_ - aux->n11) / n11
198  * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);
199  aux->n11 = n11;
200  return aux->p;
201  }
202  if (n11 == aux->n11 - 1) { // incremental
203  aux->p *= (double)aux->n11 / (aux->n1_ - n11)
204  * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);
205  aux->n11 = n11;
206  return aux->p;
207  }
208  }
209  aux->n11 = n11;
210  }
211  aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n);
212  return aux->p;
213 }
214 
215 double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two)
216 {
217  int i, j, max, min;
218  double p, q, left, right;
219  hgacc_t aux;
220  int n1_, n_1, n;
221 
222  n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n
223  max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail
224  min = n1_ + n_1 - n; // not sure why n11-n22 is used instead of min(n_1,n1_)
225  if (min < 0) min = 0; // min n11, for left tail
226  *two = *_left = *_right = 1.;
227  if (min == max) return 1.; // no need to do test
228  q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table
229  // left tail
230  p = hypergeo_acc(min, 0, 0, 0, &aux);
231  for (left = 0., i = min + 1; p < 0.99999999 * q && i<=max; ++i) // loop until underflow
232  left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);
233  --i;
234  if (p < 1.00000001 * q) left += p;
235  else --i;
236  // right tail
237  p = hypergeo_acc(max, 0, 0, 0, &aux);
238  for (right = 0., j = max - 1; p < 0.99999999 * q && j>=0; --j) // loop until underflow
239  right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);
240  ++j;
241  if (p < 1.00000001 * q) right += p;
242  else ++j;
243  // two-tail
244  *two = left + right;
245  if (*two > 1.) *two = 1.;
246  // adjust left and right
247  if (abs(i - n11) < abs(j - n11)) right = 1. - left + q;
248  else left = 1.0 - right + q;
249  *_left = left; *_right = right;
250  return q;
251 }
252 
253 
254