NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ksort.h
Go to the documentation of this file.
1 /* The MIT License
2 
3  Copyright (c) 2008 Genome Research Ltd (GRL).
4 
5  Permission is hereby granted, free of charge, to any person obtaining
6  a copy of this software and associated documentation files (the
7  "Software"), to deal in the Software without restriction, including
8  without limitation the rights to use, copy, modify, merge, publish,
9  distribute, sublicense, and/or sell copies of the Software, and to
10  permit persons to whom the Software is furnished to do so, subject to
11  the following conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  SOFTWARE.
24 */
25 
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
27 
28 /*
29  2012-12-11 (0.1.4):
30 
31  * Defined __ks_insertsort_##name as static to compile with C99.
32 
33  2008-11-16 (0.1.4):
34 
35  * Fixed a bug in introsort() that happens in rare cases.
36 
37  2008-11-05 (0.1.3):
38 
39  * Fixed a bug in introsort() for complex comparisons.
40 
41  * Fixed a bug in mergesort(). The previous version is not stable.
42 
43  2008-09-15 (0.1.2):
44 
45  * Accelerated introsort. On my Mac (not on another Linux machine),
46  my implementation is as fast as std::sort on random input.
47 
48  * Added combsort and in introsort, switch to combsort if the
49  recursion is too deep.
50 
51  2008-09-13 (0.1.1):
52 
53  * Added k-small algorithm
54 
55  2008-09-05 (0.1.0):
56 
57  * Initial version
58 
59 */
60 
61 #ifndef AC_KSORT_H
62 #define AC_KSORT_H
63 
64 #include <stdlib.h>
65 #include <string.h>
66 
67 typedef struct {
68  void *left, *right;
69  int depth;
71 
72 #define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
73 
74 #define KSORT_INIT(name, type_t, __sort_lt) \
75  void ks_mergesort_##name(size_t n, type_t array[], type_t temp[]) \
76  { \
77  type_t *a2[2], *a, *b; \
78  int curr, shift; \
79  \
80  a2[0] = array; \
81  a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \
82  for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) { \
83  a = a2[curr]; b = a2[1-curr]; \
84  if (shift == 0) { \
85  type_t *p = b, *i, *eb = a + n; \
86  for (i = a; i < eb; i += 2) { \
87  if (i == eb - 1) *p++ = *i; \
88  else { \
89  if (__sort_lt(*(i+1), *i)) { \
90  *p++ = *(i+1); *p++ = *i; \
91  } else { \
92  *p++ = *i; *p++ = *(i+1); \
93  } \
94  } \
95  } \
96  } else { \
97  size_t i, step = 1ul<<shift; \
98  for (i = 0; i < n; i += step<<1) { \
99  type_t *p, *j, *k, *ea, *eb; \
100  if (n < i + step) { \
101  ea = a + n; eb = a; \
102  } else { \
103  ea = a + i + step; \
104  eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
105  } \
106  j = a + i; k = a + i + step; p = b + i; \
107  while (j < ea && k < eb) { \
108  if (__sort_lt(*k, *j)) *p++ = *k++; \
109  else *p++ = *j++; \
110  } \
111  while (j < ea) *p++ = *j++; \
112  while (k < eb) *p++ = *k++; \
113  } \
114  } \
115  curr = 1 - curr; \
116  } \
117  if (curr == 1) { \
118  type_t *p = a2[0], *i = a2[1], *eb = array + n; \
119  for (; p < eb; ++i) *p++ = *i; \
120  } \
121  if (temp == 0) free(a2[1]); \
122  } \
123  void ks_heapadjust_##name(size_t i, size_t n, type_t l[]) \
124  { \
125  size_t k = i; \
126  type_t tmp = l[i]; \
127  while ((k = (k << 1) + 1) < n) { \
128  if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \
129  if (__sort_lt(l[k], tmp)) break; \
130  l[i] = l[k]; i = k; \
131  } \
132  l[i] = tmp; \
133  } \
134  void ks_heapmake_##name(size_t lsize, type_t l[]) \
135  { \
136  size_t i; \
137  for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \
138  ks_heapadjust_##name(i, lsize, l); \
139  } \
140  void ks_heapsort_##name(size_t lsize, type_t l[]) \
141  { \
142  size_t i; \
143  for (i = lsize - 1; i > 0; --i) { \
144  type_t tmp; \
145  tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
146  } \
147  } \
148  static inline void __ks_insertsort_##name(type_t *s, type_t *t) \
149  { \
150  type_t *i, *j, swap_tmp; \
151  for (i = s + 1; i < t; ++i) \
152  for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \
153  swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \
154  } \
155  } \
156  void ks_combsort_##name(size_t n, type_t a[]) \
157  { \
158  const double shrink_factor = 1.2473309501039786540366528676643; \
159  int do_swap; \
160  size_t gap = n; \
161  type_t tmp, *i, *j; \
162  do { \
163  if (gap > 2) { \
164  gap = (size_t)(gap / shrink_factor); \
165  if (gap == 9 || gap == 10) gap = 11; \
166  } \
167  do_swap = 0; \
168  for (i = a; i < a + n - gap; ++i) { \
169  j = i + gap; \
170  if (__sort_lt(*j, *i)) { \
171  tmp = *i; *i = *j; *j = tmp; \
172  do_swap = 1; \
173  } \
174  } \
175  } while (do_swap || gap > 2); \
176  if (gap != 1) __ks_insertsort_##name(a, a + n); \
177  } \
178  void ks_introsort_##name(size_t n, type_t a[]) \
179  { \
180  int d; \
181  ks_isort_stack_t *top, *stack; \
182  type_t rp, swap_tmp; \
183  type_t *s, *t, *i, *j, *k; \
184  \
185  if (n < 1) return; \
186  else if (n == 2) { \
187  if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
188  return; \
189  } \
190  for (d = 2; 1ul<<d < n; ++d); \
191  stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
192  top = stack; s = a; t = a + (n-1); d <<= 1; \
193  while (1) { \
194  if (s < t) { \
195  if (--d == 0) { \
196  ks_combsort_##name(t - s + 1, s); \
197  t = s; \
198  continue; \
199  } \
200  i = s; j = t; k = i + ((j-i)>>1) + 1; \
201  if (__sort_lt(*k, *i)) { \
202  if (__sort_lt(*k, *j)) k = j; \
203  } else k = __sort_lt(*j, *i)? i : j; \
204  rp = *k; \
205  if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \
206  for (;;) { \
207  do ++i; while (__sort_lt(*i, rp)); \
208  do --j; while (i <= j && __sort_lt(rp, *j)); \
209  if (j <= i) break; \
210  swap_tmp = *i; *i = *j; *j = swap_tmp; \
211  } \
212  swap_tmp = *i; *i = *t; *t = swap_tmp; \
213  if (i-s > t-i) { \
214  if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
215  s = t-i > 16? i+1 : t; \
216  } else { \
217  if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
218  t = i-s > 16? i-1 : s; \
219  } \
220  } else { \
221  if (top == stack) { \
222  free(stack); \
223  __ks_insertsort_##name(a, a+n); \
224  return; \
225  } else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
226  } \
227  } \
228  } \
229  /* This function is adapted from: http://ndevilla.free.fr/median/ */ \
230  /* 0 <= kk < n */ \
231  type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \
232  { \
233  type_t *low, *high, *k, *ll, *hh, *mid; \
234  low = arr; high = arr + n - 1; k = arr + kk; \
235  for (;;) { \
236  if (high <= low) return *k; \
237  if (high == low + 1) { \
238  if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
239  return *k; \
240  } \
241  mid = low + (high - low) / 2; \
242  if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
243  if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
244  if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \
245  KSORT_SWAP(type_t, *mid, *(low+1)); \
246  ll = low + 1; hh = high; \
247  for (;;) { \
248  do ++ll; while (__sort_lt(*ll, *low)); \
249  do --hh; while (__sort_lt(*low, *hh)); \
250  if (hh < ll) break; \
251  KSORT_SWAP(type_t, *ll, *hh); \
252  } \
253  KSORT_SWAP(type_t, *low, *hh); \
254  if (hh <= k) low = ll; \
255  if (hh >= k) high = hh - 1; \
256  } \
257  } \
258  void ks_shuffle_##name(size_t n, type_t a[]) \
259  { \
260  int i, j; \
261  for (i = n; i > 1; --i) { \
262  type_t tmp; \
263  j = (int)(drand48() * i); \
264  tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; \
265  } \
266  }
267 
268 #define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
269 #define ks_introsort(name, n, a) ks_introsort_##name(n, a)
270 #define ks_combsort(name, n, a) ks_combsort_##name(n, a)
271 #define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
272 #define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
273 #define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
274 #define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
275 #define ks_shuffle(name, n, a) ks_shuffle_##name(n, a)
276 
277 #define ks_lt_generic(a, b) ((a) < (b))
278 #define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
279 
280 typedef const char *ksstr_t;
281 
282 #define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
283 #define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
284 
285 #endif