NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
divsufsort.c
Go to the documentation of this file.
1 /*
2  * divsufsort.c for libdivsufsort-lite
3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #ifdef _OPENMP
31 # include <omp.h>
32 #endif
33 #include "divsufsort.h"
34 
35 
36 /*- Constants -*/
37 #define INLINE __inline
38 #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
39 # undef ALPHABET_SIZE
40 #endif
41 #if !defined(ALPHABET_SIZE)
42 # define ALPHABET_SIZE (256)
43 #endif
44 #define BUCKET_A_SIZE (ALPHABET_SIZE)
45 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
46 #if defined(SS_INSERTIONSORT_THRESHOLD)
47 # if SS_INSERTIONSORT_THRESHOLD < 1
48 # undef SS_INSERTIONSORT_THRESHOLD
49 # define SS_INSERTIONSORT_THRESHOLD (1)
50 # endif
51 #else
52 # define SS_INSERTIONSORT_THRESHOLD (8)
53 #endif
54 #if defined(SS_BLOCKSIZE)
55 # if SS_BLOCKSIZE < 0
56 # undef SS_BLOCKSIZE
57 # define SS_BLOCKSIZE (0)
58 # elif 32768 <= SS_BLOCKSIZE
59 # undef SS_BLOCKSIZE
60 # define SS_BLOCKSIZE (32767)
61 # endif
62 #else
63 # define SS_BLOCKSIZE (1024)
64 #endif
65 /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
66 #if SS_BLOCKSIZE == 0
67 # define SS_MISORT_STACKSIZE (96)
68 #elif SS_BLOCKSIZE <= 4096
69 # define SS_MISORT_STACKSIZE (16)
70 #else
71 # define SS_MISORT_STACKSIZE (24)
72 #endif
73 #define SS_SMERGE_STACKSIZE (32)
74 #define TR_INSERTIONSORT_THRESHOLD (8)
75 #define TR_STACKSIZE (64)
76 
77 
78 /*- Macros -*/
79 #ifndef SWAP
80 # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
81 #endif /* SWAP */
82 #ifndef MIN
83 # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
84 #endif /* MIN */
85 #ifndef MAX
86 # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
87 #endif /* MAX */
88 #define STACK_PUSH(_a, _b, _c, _d)\
89  do {\
90  assert(ssize < STACK_SIZE);\
91  stack[ssize].a = (_a), stack[ssize].b = (_b),\
92  stack[ssize].c = (_c), stack[ssize++].d = (_d);\
93  } while(0)
94 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
95  do {\
96  assert(ssize < STACK_SIZE);\
97  stack[ssize].a = (_a), stack[ssize].b = (_b),\
98  stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
99  } while(0)
100 #define STACK_POP(_a, _b, _c, _d)\
101  do {\
102  assert(0 <= ssize);\
103  if(ssize == 0) { return; }\
104  (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
105  (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
106  } while(0)
107 #define STACK_POP5(_a, _b, _c, _d, _e)\
108  do {\
109  assert(0 <= ssize);\
110  if(ssize == 0) { return; }\
111  (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
112  (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
113  } while(0)
114 #define BUCKET_A(_c0) bucket_A[(_c0)]
115 #if ALPHABET_SIZE == 256
116 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
117 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
118 #else
119 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
120 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
121 #endif
122 
123 
124 /*- Private Functions -*/
125 
126 static const int lg_table[256]= {
127  -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
128  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
129  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
130  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
131  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
132  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
133  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
134  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
135 };
136 
137 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
138 
139 static INLINE
140 int
141 ss_ilg(int n) {
142 #if SS_BLOCKSIZE == 0
143  return (n & 0xffff0000) ?
144  ((n & 0xff000000) ?
145  24 + lg_table[(n >> 24) & 0xff] :
146  16 + lg_table[(n >> 16) & 0xff]) :
147  ((n & 0x0000ff00) ?
148  8 + lg_table[(n >> 8) & 0xff] :
149  0 + lg_table[(n >> 0) & 0xff]);
150 #elif SS_BLOCKSIZE < 256
151  return lg_table[n];
152 #else
153  return (n & 0xff00) ?
154  8 + lg_table[(n >> 8) & 0xff] :
155  0 + lg_table[(n >> 0) & 0xff];
156 #endif
157 }
158 
159 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
160 
161 #if SS_BLOCKSIZE != 0
162 
163 static const int sqq_table[256] = {
164  0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
165  64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
166  90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
167 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
168 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
169 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
170 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
171 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
172 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
173 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
174 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
175 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
176 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
177 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
178 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
179 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
180 };
181 
182 static INLINE
183 int
184 ss_isqrt(int x) {
185  int y, e;
186 
187  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
188  e = (x & 0xffff0000) ?
189  ((x & 0xff000000) ?
190  24 + lg_table[(x >> 24) & 0xff] :
191  16 + lg_table[(x >> 16) & 0xff]) :
192  ((x & 0x0000ff00) ?
193  8 + lg_table[(x >> 8) & 0xff] :
194  0 + lg_table[(x >> 0) & 0xff]);
195 
196  if(e >= 16) {
197  y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
198  if(e >= 24) { y = (y + 1 + x / y) >> 1; }
199  y = (y + 1 + x / y) >> 1;
200  } else if(e >= 8) {
201  y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
202  } else {
203  return sqq_table[x] >> 4;
204  }
205 
206  return (x < (y * y)) ? y - 1 : y;
207 }
208 
209 #endif /* SS_BLOCKSIZE != 0 */
210 
211 
212 /*---------------------------------------------------------------------------*/
213 
214 /* Compares two suffixes. */
215 static INLINE
216 int
217 ss_compare(const unsigned char *T,
218  const int *p1, const int *p2,
219  int depth) {
220  const unsigned char *U1, *U2, *U1n, *U2n;
221 
222  for(U1 = T + depth + *p1,
223  U2 = T + depth + *p2,
224  U1n = T + *(p1 + 1) + 2,
225  U2n = T + *(p2 + 1) + 2;
226  (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
227  ++U1, ++U2) {
228  }
229 
230  return U1 < U1n ?
231  (U2 < U2n ? *U1 - *U2 : 1) :
232  (U2 < U2n ? -1 : 0);
233 }
234 
235 
236 /*---------------------------------------------------------------------------*/
237 
238 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
239 
240 /* Insertionsort for small size groups */
241 static
242 void
243 ss_insertionsort(const unsigned char *T, const int *PA,
244  int *first, int *last, int depth) {
245  int *i, *j;
246  int t;
247  int r;
248 
249  for(i = last - 2; first <= i; --i) {
250  for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
251  do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
252  if(last <= j) { break; }
253  }
254  if(r == 0) { *j = ~*j; }
255  *(j - 1) = t;
256  }
257 }
258 
259 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
260 
261 
262 /*---------------------------------------------------------------------------*/
263 
264 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
265 
266 static INLINE
267 void
268 ss_fixdown(const unsigned char *Td, const int *PA,
269  int *SA, int i, int size) {
270  int j, k;
271  int v;
272  int c, d, e;
273 
274  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
275  d = Td[PA[SA[k = j++]]];
276  if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
277  if(d <= c) { break; }
278  }
279  SA[i] = v;
280 }
281 
282 /* Simple top-down heapsort. */
283 static
284 void
285 ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
286  int i, m;
287  int t;
288 
289  m = size;
290  if((size % 2) == 0) {
291  m--;
292  if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
293  }
294 
295  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
296  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
297  for(i = m - 1; 0 < i; --i) {
298  t = SA[0], SA[0] = SA[i];
299  ss_fixdown(Td, PA, SA, 0, i);
300  SA[i] = t;
301  }
302 }
303 
304 
305 /*---------------------------------------------------------------------------*/
306 
307 /* Returns the median of three elements. */
308 static INLINE
309 int *
310 ss_median3(const unsigned char *Td, const int *PA,
311  int *v1, int *v2, int *v3) {
312  int *t;
313  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
314  if(Td[PA[*v2]] > Td[PA[*v3]]) {
315  if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
316  else { return v3; }
317  }
318  return v2;
319 }
320 
321 /* Returns the median of five elements. */
322 static INLINE
323 int *
324 ss_median5(const unsigned char *Td, const int *PA,
325  int *v1, int *v2, int *v3, int *v4, int *v5) {
326  int *t;
327  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
328  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
329  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
330  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
331  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
332  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
333  return v3;
334 }
335 
336 /* Returns the pivot element. */
337 static INLINE
338 int *
339 ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
340  int *middle;
341  int t;
342 
343  t = last - first;
344  middle = first + t / 2;
345 
346  if(t <= 512) {
347  if(t <= 32) {
348  return ss_median3(Td, PA, first, middle, last - 1);
349  } else {
350  t >>= 2;
351  return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
352  }
353  }
354  t >>= 3;
355  first = ss_median3(Td, PA, first, first + t, first + (t << 1));
356  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
357  last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
358  return ss_median3(Td, PA, first, middle, last);
359 }
360 
361 
362 /*---------------------------------------------------------------------------*/
363 
364 /* Binary partition for substrings. */
365 static INLINE
366 int *
367 ss_partition(const int *PA,
368  int *first, int *last, int depth) {
369  int *a, *b;
370  int t;
371  for(a = first - 1, b = last;;) {
372  for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
373  for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
374  if(b <= a) { break; }
375  t = ~*b;
376  *b = *a;
377  *a = t;
378  }
379  if(first < a) { *first = ~*first; }
380  return a;
381 }
382 
383 /* Multikey introsort for medium size groups. */
384 static
385 void
386 ss_mintrosort(const unsigned char *T, const int *PA,
387  int *first, int *last,
388  int depth) {
389 #define STACK_SIZE SS_MISORT_STACKSIZE
390  struct { int *a, *b, c; int d; } stack[STACK_SIZE];
391  const unsigned char *Td;
392  int *a, *b, *c, *d, *e, *f;
393  int s, t;
394  int ssize;
395  int limit;
396  int v, x = 0;
397 
398  for(ssize = 0, limit = ss_ilg(last - first);;) {
399 
400  if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
401 #if 1 < SS_INSERTIONSORT_THRESHOLD
402  if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
403 #endif
404  STACK_POP(first, last, depth, limit);
405  continue;
406  }
407 
408  Td = T + depth;
409  if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
410  if(limit < 0) {
411  for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
412  if((x = Td[PA[*a]]) != v) {
413  if(1 < (a - first)) { break; }
414  v = x;
415  first = a;
416  }
417  }
418  if(Td[PA[*first] - 1] < v) {
419  first = ss_partition(PA, first, a, depth);
420  }
421  if((a - first) <= (last - a)) {
422  if(1 < (a - first)) {
423  STACK_PUSH(a, last, depth, -1);
424  last = a, depth += 1, limit = ss_ilg(a - first);
425  } else {
426  first = a, limit = -1;
427  }
428  } else {
429  if(1 < (last - a)) {
430  STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
431  first = a, limit = -1;
432  } else {
433  last = a, depth += 1, limit = ss_ilg(a - first);
434  }
435  }
436  continue;
437  }
438 
439  /* choose pivot */
440  a = ss_pivot(Td, PA, first, last);
441  v = Td[PA[*a]];
442  SWAP(*first, *a);
443 
444  /* partition */
445  for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
446  if(((a = b) < last) && (x < v)) {
447  for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
448  if(x == v) { SWAP(*b, *a); ++a; }
449  }
450  }
451  for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
452  if((b < (d = c)) && (x > v)) {
453  for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
454  if(x == v) { SWAP(*c, *d); --d; }
455  }
456  }
457  for(; b < c;) {
458  SWAP(*b, *c);
459  for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
460  if(x == v) { SWAP(*b, *a); ++a; }
461  }
462  for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
463  if(x == v) { SWAP(*c, *d); --d; }
464  }
465  }
466 
467  if(a <= d) {
468  c = b - 1;
469 
470  if((s = a - first) > (t = b - a)) { s = t; }
471  for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
472  if((s = d - c) > (t = last - d - 1)) { s = t; }
473  for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
474 
475  a = first + (b - a), c = last - (d - c);
476  b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
477 
478  if((a - first) <= (last - c)) {
479  if((last - c) <= (c - b)) {
480  STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
481  STACK_PUSH(c, last, depth, limit);
482  last = a;
483  } else if((a - first) <= (c - b)) {
484  STACK_PUSH(c, last, depth, limit);
485  STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
486  last = a;
487  } else {
488  STACK_PUSH(c, last, depth, limit);
489  STACK_PUSH(first, a, depth, limit);
490  first = b, last = c, depth += 1, limit = ss_ilg(c - b);
491  }
492  } else {
493  if((a - first) <= (c - b)) {
494  STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
495  STACK_PUSH(first, a, depth, limit);
496  first = c;
497  } else if((last - c) <= (c - b)) {
498  STACK_PUSH(first, a, depth, limit);
499  STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
500  first = c;
501  } else {
502  STACK_PUSH(first, a, depth, limit);
503  STACK_PUSH(c, last, depth, limit);
504  first = b, last = c, depth += 1, limit = ss_ilg(c - b);
505  }
506  }
507  } else {
508  limit += 1;
509  if(Td[PA[*first] - 1] < v) {
510  first = ss_partition(PA, first, last, depth);
511  limit = ss_ilg(last - first);
512  }
513  depth += 1;
514  }
515  }
516 #undef STACK_SIZE
517 }
518 
519 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
520 
521 
522 /*---------------------------------------------------------------------------*/
523 
524 #if SS_BLOCKSIZE != 0
525 
526 static INLINE
527 void
528 ss_blockswap(int *a, int *b, int n) {
529  int t;
530  for(; 0 < n; --n, ++a, ++b) {
531  t = *a, *a = *b, *b = t;
532  }
533 }
534 
535 static INLINE
536 void
537 ss_rotate(int *first, int *middle, int *last) {
538  int *a, *b, t;
539  int l, r;
540  l = middle - first, r = last - middle;
541  for(; (0 < l) && (0 < r);) {
542  if(l == r) { ss_blockswap(first, middle, l); break; }
543  if(l < r) {
544  a = last - 1, b = middle - 1;
545  t = *a;
546  do {
547  *a-- = *b, *b-- = *a;
548  if(b < first) {
549  *a = t;
550  last = a;
551  if((r -= l + 1) <= l) { break; }
552  a -= 1, b = middle - 1;
553  t = *a;
554  }
555  } while(1);
556  } else {
557  a = first, b = middle;
558  t = *a;
559  do {
560  *a++ = *b, *b++ = *a;
561  if(last <= b) {
562  *a = t;
563  first = a + 1;
564  if((l -= r + 1) <= r) { break; }
565  a += 1, b = middle;
566  t = *a;
567  }
568  } while(1);
569  }
570  }
571 }
572 
573 
574 /*---------------------------------------------------------------------------*/
575 
576 static
577 void
578 ss_inplacemerge(const unsigned char *T, const int *PA,
579  int *first, int *middle, int *last,
580  int depth) {
581  const int *p;
582  int *a, *b;
583  int len, half;
584  int q, r;
585  int x;
586 
587  for(;;) {
588  if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
589  else { x = 0; p = PA + *(last - 1); }
590  for(a = first, len = middle - first, half = len >> 1, r = -1;
591  0 < len;
592  len = half, half >>= 1) {
593  b = a + half;
594  q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
595  if(q < 0) {
596  a = b + 1;
597  half -= (len & 1) ^ 1;
598  } else {
599  r = q;
600  }
601  }
602  if(a < middle) {
603  if(r == 0) { *a = ~*a; }
604  ss_rotate(a, middle, last);
605  last -= middle - a;
606  middle = a;
607  if(first == middle) { break; }
608  }
609  --last;
610  if(x != 0) { while(*--last < 0) { } }
611  if(middle == last) { break; }
612  }
613 }
614 
615 
616 /*---------------------------------------------------------------------------*/
617 
618 /* Merge-forward with internal buffer. */
619 static
620 void
621 ss_mergeforward(const unsigned char *T, const int *PA,
622  int *first, int *middle, int *last,
623  int *buf, int depth) {
624  int *a, *b, *c, *bufend;
625  int t;
626  int r;
627 
628  bufend = buf + (middle - first) - 1;
629  ss_blockswap(buf, first, middle - first);
630 
631  for(t = *(a = first), b = buf, c = middle;;) {
632  r = ss_compare(T, PA + *b, PA + *c, depth);
633  if(r < 0) {
634  do {
635  *a++ = *b;
636  if(bufend <= b) { *bufend = t; return; }
637  *b++ = *a;
638  } while(*b < 0);
639  } else if(r > 0) {
640  do {
641  *a++ = *c, *c++ = *a;
642  if(last <= c) {
643  while(b < bufend) { *a++ = *b, *b++ = *a; }
644  *a = *b, *b = t;
645  return;
646  }
647  } while(*c < 0);
648  } else {
649  *c = ~*c;
650  do {
651  *a++ = *b;
652  if(bufend <= b) { *bufend = t; return; }
653  *b++ = *a;
654  } while(*b < 0);
655 
656  do {
657  *a++ = *c, *c++ = *a;
658  if(last <= c) {
659  while(b < bufend) { *a++ = *b, *b++ = *a; }
660  *a = *b, *b = t;
661  return;
662  }
663  } while(*c < 0);
664  }
665  }
666 }
667 
668 /* Merge-backward with internal buffer. */
669 static
670 void
671 ss_mergebackward(const unsigned char *T, const int *PA,
672  int *first, int *middle, int *last,
673  int *buf, int depth) {
674  const int *p1, *p2;
675  int *a, *b, *c, *bufend;
676  int t;
677  int r;
678  int x;
679 
680  bufend = buf + (last - middle) - 1;
681  ss_blockswap(buf, middle, last - middle);
682 
683  x = 0;
684  if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
685  else { p1 = PA + *bufend; }
686  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
687  else { p2 = PA + *(middle - 1); }
688  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
689  r = ss_compare(T, p1, p2, depth);
690  if(0 < r) {
691  if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
692  *a-- = *b;
693  if(b <= buf) { *buf = t; break; }
694  *b-- = *a;
695  if(*b < 0) { p1 = PA + ~*b; x |= 1; }
696  else { p1 = PA + *b; }
697  } else if(r < 0) {
698  if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
699  *a-- = *c, *c-- = *a;
700  if(c < first) {
701  while(buf < b) { *a-- = *b, *b-- = *a; }
702  *a = *b, *b = t;
703  break;
704  }
705  if(*c < 0) { p2 = PA + ~*c; x |= 2; }
706  else { p2 = PA + *c; }
707  } else {
708  if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
709  *a-- = ~*b;
710  if(b <= buf) { *buf = t; break; }
711  *b-- = *a;
712  if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
713  *a-- = *c, *c-- = *a;
714  if(c < first) {
715  while(buf < b) { *a-- = *b, *b-- = *a; }
716  *a = *b, *b = t;
717  break;
718  }
719  if(*b < 0) { p1 = PA + ~*b; x |= 1; }
720  else { p1 = PA + *b; }
721  if(*c < 0) { p2 = PA + ~*c; x |= 2; }
722  else { p2 = PA + *c; }
723  }
724  }
725 }
726 
727 /* D&C based merge. */
728 static
729 void
730 ss_swapmerge(const unsigned char *T, const int *PA,
731  int *first, int *middle, int *last,
732  int *buf, int bufsize, int depth) {
733 #define STACK_SIZE SS_SMERGE_STACKSIZE
734 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
735 #define MERGE_CHECK(a, b, c)\
736  do {\
737  if(((c) & 1) ||\
738  (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
739  *(a) = ~*(a);\
740  }\
741  if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
742  *(b) = ~*(b);\
743  }\
744  } while(0)
745  struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
746  int *l, *r, *lm, *rm;
747  int m, len, half;
748  int ssize;
749  int check, next;
750 
751  for(check = 0, ssize = 0;;) {
752  if((last - middle) <= bufsize) {
753  if((first < middle) && (middle < last)) {
754  ss_mergebackward(T, PA, first, middle, last, buf, depth);
755  }
756  MERGE_CHECK(first, last, check);
757  STACK_POP(first, middle, last, check);
758  continue;
759  }
760 
761  if((middle - first) <= bufsize) {
762  if(first < middle) {
763  ss_mergeforward(T, PA, first, middle, last, buf, depth);
764  }
765  MERGE_CHECK(first, last, check);
766  STACK_POP(first, middle, last, check);
767  continue;
768  }
769 
770  for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
771  0 < len;
772  len = half, half >>= 1) {
773  if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
774  PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
775  m += half + 1;
776  half -= (len & 1) ^ 1;
777  }
778  }
779 
780  if(0 < m) {
781  lm = middle - m, rm = middle + m;
782  ss_blockswap(lm, middle, m);
783  l = r = middle, next = 0;
784  if(rm < last) {
785  if(*rm < 0) {
786  *rm = ~*rm;
787  if(first < lm) { for(; *--l < 0;) { } next |= 4; }
788  next |= 1;
789  } else if(first < lm) {
790  for(; *r < 0; ++r) { }
791  next |= 2;
792  }
793  }
794 
795  if((l - first) <= (last - r)) {
796  STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
797  middle = lm, last = l, check = (check & 3) | (next & 4);
798  } else {
799  if((next & 2) && (r == middle)) { next ^= 6; }
800  STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
801  first = r, middle = rm, check = (next & 3) | (check & 4);
802  }
803  } else {
804  if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
805  *middle = ~*middle;
806  }
807  MERGE_CHECK(first, last, check);
808  STACK_POP(first, middle, last, check);
809  }
810  }
811 #undef STACK_SIZE
812 }
813 
814 #endif /* SS_BLOCKSIZE != 0 */
815 
816 
817 /*---------------------------------------------------------------------------*/
818 
819 /* Substring sort */
820 static
821 void
822 sssort(const unsigned char *T, const int *PA,
823  int *first, int *last,
824  int *buf, int bufsize,
825  int depth, int n, int lastsuffix) {
826  int *a;
827 #if SS_BLOCKSIZE != 0
828  int *b, *middle, *curbuf;
829  int j, k, curbufsize, limit;
830 #endif
831  int i;
832 
833  if(lastsuffix != 0) { ++first; }
834 
835 #if SS_BLOCKSIZE == 0
836  ss_mintrosort(T, PA, first, last, depth);
837 #else
838  if((bufsize < SS_BLOCKSIZE) &&
839  (bufsize < (last - first)) &&
840  (bufsize < (limit = ss_isqrt(last - first)))) {
841  if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
842  buf = middle = last - limit, bufsize = limit;
843  } else {
844  middle = last, limit = 0;
845  }
846  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
847 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
848  ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
849 #elif 1 < SS_BLOCKSIZE
850  ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
851 #endif
852  curbufsize = last - (a + SS_BLOCKSIZE);
853  curbuf = a + SS_BLOCKSIZE;
854  if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
855  for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
856  ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
857  }
858  }
859 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
860  ss_mintrosort(T, PA, a, middle, depth);
861 #elif 1 < SS_BLOCKSIZE
862  ss_insertionsort(T, PA, a, middle, depth);
863 #endif
864  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
865  if(i & 1) {
866  ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
867  a -= k;
868  }
869  }
870  if(limit != 0) {
871 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
872  ss_mintrosort(T, PA, middle, last, depth);
873 #elif 1 < SS_BLOCKSIZE
874  ss_insertionsort(T, PA, middle, last, depth);
875 #endif
876  ss_inplacemerge(T, PA, first, middle, last, depth);
877  }
878 #endif
879 
880  if(lastsuffix != 0) {
881  /* Insert last type B* suffix. */
882  int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
883  for(a = first, i = *(first - 1);
884  (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
885  ++a) {
886  *(a - 1) = *a;
887  }
888  *(a - 1) = i;
889  }
890 }
891 
892 
893 /*---------------------------------------------------------------------------*/
894 
895 static INLINE
896 int
897 tr_ilg(int n) {
898  return (n & 0xffff0000) ?
899  ((n & 0xff000000) ?
900  24 + lg_table[(n >> 24) & 0xff] :
901  16 + lg_table[(n >> 16) & 0xff]) :
902  ((n & 0x0000ff00) ?
903  8 + lg_table[(n >> 8) & 0xff] :
904  0 + lg_table[(n >> 0) & 0xff]);
905 }
906 
907 
908 /*---------------------------------------------------------------------------*/
909 
910 /* Simple insertionsort for small size groups. */
911 static
912 void
913 tr_insertionsort(const int *ISAd, int *first, int *last) {
914  int *a, *b;
915  int t, r;
916 
917  for(a = first + 1; a < last; ++a) {
918  for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
919  do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
920  if(b < first) { break; }
921  }
922  if(r == 0) { *b = ~*b; }
923  *(b + 1) = t;
924  }
925 }
926 
927 
928 /*---------------------------------------------------------------------------*/
929 
930 static INLINE
931 void
932 tr_fixdown(const int *ISAd, int *SA, int i, int size) {
933  int j, k;
934  int v;
935  int c, d, e;
936 
937  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
938  d = ISAd[SA[k = j++]];
939  if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
940  if(d <= c) { break; }
941  }
942  SA[i] = v;
943 }
944 
945 /* Simple top-down heapsort. */
946 static
947 void
948 tr_heapsort(const int *ISAd, int *SA, int size) {
949  int i, m;
950  int t;
951 
952  m = size;
953  if((size % 2) == 0) {
954  m--;
955  if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
956  }
957 
958  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
959  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
960  for(i = m - 1; 0 < i; --i) {
961  t = SA[0], SA[0] = SA[i];
962  tr_fixdown(ISAd, SA, 0, i);
963  SA[i] = t;
964  }
965 }
966 
967 
968 /*---------------------------------------------------------------------------*/
969 
970 /* Returns the median of three elements. */
971 static INLINE
972 int *
973 tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
974  int *t;
975  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
976  if(ISAd[*v2] > ISAd[*v3]) {
977  if(ISAd[*v1] > ISAd[*v3]) { return v1; }
978  else { return v3; }
979  }
980  return v2;
981 }
982 
983 /* Returns the median of five elements. */
984 static INLINE
985 int *
986 tr_median5(const int *ISAd,
987  int *v1, int *v2, int *v3, int *v4, int *v5) {
988  int *t;
989  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
990  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
991  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
992  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
993  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
994  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
995  return v3;
996 }
997 
998 /* Returns the pivot element. */
999 static INLINE
1000 int *
1001 tr_pivot(const int *ISAd, int *first, int *last) {
1002  int *middle;
1003  int t;
1004 
1005  t = last - first;
1006  middle = first + t / 2;
1007 
1008  if(t <= 512) {
1009  if(t <= 32) {
1010  return tr_median3(ISAd, first, middle, last - 1);
1011  } else {
1012  t >>= 2;
1013  return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1014  }
1015  }
1016  t >>= 3;
1017  first = tr_median3(ISAd, first, first + t, first + (t << 1));
1018  middle = tr_median3(ISAd, middle - t, middle, middle + t);
1019  last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1020  return tr_median3(ISAd, first, middle, last);
1021 }
1022 
1023 
1024 /*---------------------------------------------------------------------------*/
1025 
1026 typedef struct _trbudget_t trbudget_t;
1027 struct _trbudget_t {
1028  int chance;
1029  int remain;
1030  int incval;
1031  int count;
1032 };
1033 
1034 static INLINE
1035 void
1036 trbudget_init(trbudget_t *budget, int chance, int incval) {
1037  budget->chance = chance;
1038  budget->remain = budget->incval = incval;
1039 }
1040 
1041 static INLINE
1042 int
1043 trbudget_check(trbudget_t *budget, int size) {
1044  if(size <= budget->remain) { budget->remain -= size; return 1; }
1045  if(budget->chance == 0) { budget->count += size; return 0; }
1046  budget->remain += budget->incval - size;
1047  budget->chance -= 1;
1048  return 1;
1049 }
1050 
1051 
1052 /*---------------------------------------------------------------------------*/
1053 
1054 static INLINE
1055 void
1056 tr_partition(const int *ISAd,
1057  int *first, int *middle, int *last,
1058  int **pa, int **pb, int v) {
1059  int *a, *b, *c, *d, *e, *f;
1060  int t, s;
1061  int x = 0;
1062 
1063  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1064  if(((a = b) < last) && (x < v)) {
1065  for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1066  if(x == v) { SWAP(*b, *a); ++a; }
1067  }
1068  }
1069  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1070  if((b < (d = c)) && (x > v)) {
1071  for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1072  if(x == v) { SWAP(*c, *d); --d; }
1073  }
1074  }
1075  for(; b < c;) {
1076  SWAP(*b, *c);
1077  for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1078  if(x == v) { SWAP(*b, *a); ++a; }
1079  }
1080  for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1081  if(x == v) { SWAP(*c, *d); --d; }
1082  }
1083  }
1084 
1085  if(a <= d) {
1086  c = b - 1;
1087  if((s = a - first) > (t = b - a)) { s = t; }
1088  for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1089  if((s = d - c) > (t = last - d - 1)) { s = t; }
1090  for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1091  first += (b - a), last -= (d - c);
1092  }
1093  *pa = first, *pb = last;
1094 }
1095 
1096 static
1097 void
1098 tr_copy(int *ISA, const int *SA,
1099  int *first, int *a, int *b, int *last,
1100  int depth) {
1101  /* sort suffixes of middle partition
1102  by using sorted order of suffixes of left and right partition. */
1103  int *c, *d, *e;
1104  int s, v;
1105 
1106  v = b - SA - 1;
1107  for(c = first, d = a - 1; c <= d; ++c) {
1108  if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1109  *++d = s;
1110  ISA[s] = d - SA;
1111  }
1112  }
1113  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1114  if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1115  *--d = s;
1116  ISA[s] = d - SA;
1117  }
1118  }
1119 }
1120 
1121 static
1122 void
1123 tr_partialcopy(int *ISA, const int *SA,
1124  int *first, int *a, int *b, int *last,
1125  int depth) {
1126  int *c, *d, *e;
1127  int s, v;
1128  int rank, lastrank, newrank = -1;
1129 
1130  v = b - SA - 1;
1131  lastrank = -1;
1132  for(c = first, d = a - 1; c <= d; ++c) {
1133  if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1134  *++d = s;
1135  rank = ISA[s + depth];
1136  if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1137  ISA[s] = newrank;
1138  }
1139  }
1140 
1141  lastrank = -1;
1142  for(e = d; first <= e; --e) {
1143  rank = ISA[*e];
1144  if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1145  if(newrank != rank) { ISA[*e] = newrank; }
1146  }
1147 
1148  lastrank = -1;
1149  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1150  if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1151  *--d = s;
1152  rank = ISA[s + depth];
1153  if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1154  ISA[s] = newrank;
1155  }
1156  }
1157 }
1158 
1159 static
1160 void
1161 tr_introsort(int *ISA, const int *ISAd,
1162  int *SA, int *first, int *last,
1163  trbudget_t *budget) {
1164 #define STACK_SIZE TR_STACKSIZE
1165  struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1166  int *a, *b, *c;
1167  int t;
1168  int v, x = 0;
1169  int incr = ISAd - ISA;
1170  int limit, next;
1171  int ssize, trlink = -1;
1172 
1173  for(ssize = 0, limit = tr_ilg(last - first);;) {
1174 
1175  if(limit < 0) {
1176  if(limit == -1) {
1177  /* tandem repeat partition */
1178  tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1179 
1180  /* update ranks */
1181  if(a < last) {
1182  for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1183  }
1184  if(b < last) {
1185  for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1186  }
1187 
1188  /* push */
1189  if(1 < (b - a)) {
1190  STACK_PUSH5(NULL, a, b, 0, 0);
1191  STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1192  trlink = ssize - 2;
1193  }
1194  if((a - first) <= (last - b)) {
1195  if(1 < (a - first)) {
1196  STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1197  last = a, limit = tr_ilg(a - first);
1198  } else if(1 < (last - b)) {
1199  first = b, limit = tr_ilg(last - b);
1200  } else {
1201  STACK_POP5(ISAd, first, last, limit, trlink);
1202  }
1203  } else {
1204  if(1 < (last - b)) {
1205  STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1206  first = b, limit = tr_ilg(last - b);
1207  } else if(1 < (a - first)) {
1208  last = a, limit = tr_ilg(a - first);
1209  } else {
1210  STACK_POP5(ISAd, first, last, limit, trlink);
1211  }
1212  }
1213  } else if(limit == -2) {
1214  /* tandem repeat copy */
1215  a = stack[--ssize].b, b = stack[ssize].c;
1216  if(stack[ssize].d == 0) {
1217  tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1218  } else {
1219  if(0 <= trlink) { stack[trlink].d = -1; }
1220  tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1221  }
1222  STACK_POP5(ISAd, first, last, limit, trlink);
1223  } else {
1224  /* sorted partition */
1225  if(0 <= *first) {
1226  a = first;
1227  do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1228  first = a;
1229  }
1230  if(first < last) {
1231  a = first; do { *a = ~*a; } while(*++a < 0);
1232  next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1233  if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1234 
1235  /* push */
1236  if(trbudget_check(budget, a - first)) {
1237  if((a - first) <= (last - a)) {
1238  STACK_PUSH5(ISAd, a, last, -3, trlink);
1239  ISAd += incr, last = a, limit = next;
1240  } else {
1241  if(1 < (last - a)) {
1242  STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1243  first = a, limit = -3;
1244  } else {
1245  ISAd += incr, last = a, limit = next;
1246  }
1247  }
1248  } else {
1249  if(0 <= trlink) { stack[trlink].d = -1; }
1250  if(1 < (last - a)) {
1251  first = a, limit = -3;
1252  } else {
1253  STACK_POP5(ISAd, first, last, limit, trlink);
1254  }
1255  }
1256  } else {
1257  STACK_POP5(ISAd, first, last, limit, trlink);
1258  }
1259  }
1260  continue;
1261  }
1262 
1263  if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1264  tr_insertionsort(ISAd, first, last);
1265  limit = -3;
1266  continue;
1267  }
1268 
1269  if(limit-- == 0) {
1270  tr_heapsort(ISAd, first, last - first);
1271  for(a = last - 1; first < a; a = b) {
1272  for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1273  }
1274  limit = -3;
1275  continue;
1276  }
1277 
1278  /* choose pivot */
1279  a = tr_pivot(ISAd, first, last);
1280  SWAP(*first, *a);
1281  v = ISAd[*first];
1282 
1283  /* partition */
1284  tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1285  if((last - first) != (b - a)) {
1286  next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1287 
1288  /* update ranks */
1289  for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1290  if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1291 
1292  /* push */
1293  if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1294  if((a - first) <= (last - b)) {
1295  if((last - b) <= (b - a)) {
1296  if(1 < (a - first)) {
1297  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1298  STACK_PUSH5(ISAd, b, last, limit, trlink);
1299  last = a;
1300  } else if(1 < (last - b)) {
1301  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1302  first = b;
1303  } else {
1304  ISAd += incr, first = a, last = b, limit = next;
1305  }
1306  } else if((a - first) <= (b - a)) {
1307  if(1 < (a - first)) {
1308  STACK_PUSH5(ISAd, b, last, limit, trlink);
1309  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1310  last = a;
1311  } else {
1312  STACK_PUSH5(ISAd, b, last, limit, trlink);
1313  ISAd += incr, first = a, last = b, limit = next;
1314  }
1315  } else {
1316  STACK_PUSH5(ISAd, b, last, limit, trlink);
1317  STACK_PUSH5(ISAd, first, a, limit, trlink);
1318  ISAd += incr, first = a, last = b, limit = next;
1319  }
1320  } else {
1321  if((a - first) <= (b - a)) {
1322  if(1 < (last - b)) {
1323  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1324  STACK_PUSH5(ISAd, first, a, limit, trlink);
1325  first = b;
1326  } else if(1 < (a - first)) {
1327  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1328  last = a;
1329  } else {
1330  ISAd += incr, first = a, last = b, limit = next;
1331  }
1332  } else if((last - b) <= (b - a)) {
1333  if(1 < (last - b)) {
1334  STACK_PUSH5(ISAd, first, a, limit, trlink);
1335  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1336  first = b;
1337  } else {
1338  STACK_PUSH5(ISAd, first, a, limit, trlink);
1339  ISAd += incr, first = a, last = b, limit = next;
1340  }
1341  } else {
1342  STACK_PUSH5(ISAd, first, a, limit, trlink);
1343  STACK_PUSH5(ISAd, b, last, limit, trlink);
1344  ISAd += incr, first = a, last = b, limit = next;
1345  }
1346  }
1347  } else {
1348  if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1349  if((a - first) <= (last - b)) {
1350  if(1 < (a - first)) {
1351  STACK_PUSH5(ISAd, b, last, limit, trlink);
1352  last = a;
1353  } else if(1 < (last - b)) {
1354  first = b;
1355  } else {
1356  STACK_POP5(ISAd, first, last, limit, trlink);
1357  }
1358  } else {
1359  if(1 < (last - b)) {
1360  STACK_PUSH5(ISAd, first, a, limit, trlink);
1361  first = b;
1362  } else if(1 < (a - first)) {
1363  last = a;
1364  } else {
1365  STACK_POP5(ISAd, first, last, limit, trlink);
1366  }
1367  }
1368  }
1369  } else {
1370  if(trbudget_check(budget, last - first)) {
1371  limit = tr_ilg(last - first), ISAd += incr;
1372  } else {
1373  if(0 <= trlink) { stack[trlink].d = -1; }
1374  STACK_POP5(ISAd, first, last, limit, trlink);
1375  }
1376  }
1377  }
1378 #undef STACK_SIZE
1379 }
1380 
1381 
1382 
1383 /*---------------------------------------------------------------------------*/
1384 
1385 /* Tandem repeat sort */
1386 static
1387 void
1388 trsort(int *ISA, int *SA, int n, int depth) {
1389  int *ISAd;
1390  int *first, *last;
1391  trbudget_t budget;
1392  int t, skip, unsorted;
1393 
1394  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1395 /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1396  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1397  first = SA;
1398  skip = 0;
1399  unsorted = 0;
1400  do {
1401  if((t = *first) < 0) { first -= t; skip += t; }
1402  else {
1403  if(skip != 0) { *(first + skip) = skip; skip = 0; }
1404  last = SA + ISA[t] + 1;
1405  if(1 < (last - first)) {
1406  budget.count = 0;
1407  tr_introsort(ISA, ISAd, SA, first, last, &budget);
1408  if(budget.count != 0) { unsorted += budget.count; }
1409  else { skip = first - last; }
1410  } else if((last - first) == 1) {
1411  skip = -1;
1412  }
1413  first = last;
1414  }
1415  } while(first < (SA + n));
1416  if(skip != 0) { *(first + skip) = skip; }
1417  if(unsorted == 0) { break; }
1418  }
1419 }
1420 
1421 
1422 /*---------------------------------------------------------------------------*/
1423 
1424 /* Sorts suffixes of type B*. */
1425 static
1426 int
1427 sort_typeBstar(const unsigned char *T, int *SA,
1428  int *bucket_A, int *bucket_B,
1429  int n) {
1430  int *PAb, *ISAb, *buf;
1431 #ifdef _OPENMP
1432  int *curbuf;
1433  int l;
1434 #endif
1435  int i, j, k, t, m, bufsize;
1436  int c0, c1;
1437 #ifdef _OPENMP
1438  int d0, d1;
1439  int tmp;
1440 #endif
1441 
1442  /* Initialize bucket arrays. */
1443  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
1444  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
1445 
1446  /* Count the number of occurrences of the first one or two characters of each
1447  type A, B and B* suffix. Moreover, store the beginning position of all
1448  type B* suffixes into the array SA. */
1449  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1450  /* type A suffix. */
1451  do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1452  if(0 <= i) {
1453  /* type B* suffix. */
1454  ++BUCKET_BSTAR(c0, c1);
1455  SA[--m] = i;
1456  /* type B suffix. */
1457  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1458  ++BUCKET_B(c0, c1);
1459  }
1460  }
1461  }
1462  m = n - m;
1463 /*
1464 note:
1465  A type B* suffix is lexicographically smaller than a type B suffix that
1466  begins with the same first two characters.
1467 */
1468 
1469  /* Calculate the index of start/end point of each bucket. */
1470  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1471  t = i + BUCKET_A(c0);
1472  BUCKET_A(c0) = i + j; /* start point */
1473  i = t + BUCKET_B(c0, c0);
1474  for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1475  j += BUCKET_BSTAR(c0, c1);
1476  BUCKET_BSTAR(c0, c1) = j; /* end point */
1477  i += BUCKET_B(c0, c1);
1478  }
1479  }
1480 
1481  if(0 < m) {
1482  /* Sort the type B* suffixes by their first two characters. */
1483  PAb = SA + n - m; ISAb = SA + m;
1484  for(i = m - 2; 0 <= i; --i) {
1485  t = PAb[i], c0 = T[t], c1 = T[t + 1];
1486  SA[--BUCKET_BSTAR(c0, c1)] = i;
1487  }
1488  t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1489  SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1490 
1491  /* Sort the type B* substrings using sssort. */
1492 #ifdef _OPENMP
1493  tmp = omp_get_max_threads();
1494  buf = SA + m, bufsize = (n - (2 * m)) / tmp;
1495  c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1496 #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
1497  {
1498  tmp = omp_get_thread_num();
1499  curbuf = buf + tmp * bufsize;
1500  k = 0;
1501  for(;;) {
1502  #pragma omp critical(sssort_lock)
1503  {
1504  if(0 < (l = j)) {
1505  d0 = c0, d1 = c1;
1506  do {
1507  k = BUCKET_BSTAR(d0, d1);
1508  if(--d1 <= d0) {
1509  d1 = ALPHABET_SIZE - 1;
1510  if(--d0 < 0) { break; }
1511  }
1512  } while(((l - k) <= 1) && (0 < (l = k)));
1513  c0 = d0, c1 = d1, j = k;
1514  }
1515  }
1516  if(l == 0) { break; }
1517  sssort(T, PAb, SA + k, SA + l,
1518  curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1519  }
1520  }
1521 #else
1522  buf = SA + m, bufsize = n - (2 * m);
1523  for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1524  for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1525  i = BUCKET_BSTAR(c0, c1);
1526  if(1 < (j - i)) {
1527  sssort(T, PAb, SA + i, SA + j,
1528  buf, bufsize, 2, n, *(SA + i) == (m - 1));
1529  }
1530  }
1531  }
1532 #endif
1533 
1534  /* Compute ranks of type B* substrings. */
1535  for(i = m - 1; 0 <= i; --i) {
1536  if(0 <= SA[i]) {
1537  j = i;
1538  do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1539  SA[i + 1] = i - j;
1540  if(i <= 0) { break; }
1541  }
1542  j = i;
1543  do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1544  ISAb[SA[i]] = j;
1545  }
1546 
1547  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1548  trsort(ISAb, SA, m, 1);
1549 
1550  /* Set the sorted order of tyoe B* suffixes. */
1551  for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1552  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1553  if(0 <= i) {
1554  t = i;
1555  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1556  SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1557  }
1558  }
1559 
1560  /* Calculate the index of start/end point of each bucket. */
1561  BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1562  for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1563  i = BUCKET_A(c0 + 1) - 1;
1564  for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1565  t = i - BUCKET_B(c0, c1);
1566  BUCKET_B(c0, c1) = i; /* end point */
1567 
1568  /* Move all type B* suffixes to the correct position. */
1569  for(i = t, j = BUCKET_BSTAR(c0, c1);
1570  j <= k;
1571  --i, --k) { SA[i] = SA[k]; }
1572  }
1573  BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1574  BUCKET_B(c0, c0) = i; /* end point */
1575  }
1576  }
1577 
1578  return m;
1579 }
1580 
1581 /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1582 static
1583 void
1584 construct_SA(const unsigned char *T, int *SA,
1585  int *bucket_A, int *bucket_B,
1586  int n, int m) {
1587  int *i, *j, *k;
1588  int s;
1589  int c0, c1, c2;
1590 
1591  if(0 < m) {
1592  /* Construct the sorted order of type B suffixes by using
1593  the sorted order of type B* suffixes. */
1594  for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1595  /* Scan the suffix array from right to left. */
1596  for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1597  j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1598  i <= j;
1599  --j) {
1600  if(0 < (s = *j)) {
1601  assert(T[s] == c1);
1602  assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1603  assert(T[s - 1] <= T[s]);
1604  *j = ~s;
1605  c0 = T[--s];
1606  if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1607  if(c0 != c2) {
1608  if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1609  k = SA + BUCKET_B(c2 = c0, c1);
1610  }
1611  assert(k < j);
1612  *k-- = s;
1613  } else {
1614  assert(((s == 0) && (T[s] == c1)) || (s < 0));
1615  *j = ~s;
1616  }
1617  }
1618  }
1619  }
1620 
1621  /* Construct the suffix array by using
1622  the sorted order of type B suffixes. */
1623  k = SA + BUCKET_A(c2 = T[n - 1]);
1624  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1625  /* Scan the suffix array from left to right. */
1626  for(i = SA, j = SA + n; i < j; ++i) {
1627  if(0 < (s = *i)) {
1628  assert(T[s - 1] >= T[s]);
1629  c0 = T[--s];
1630  if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1631  if(c0 != c2) {
1632  BUCKET_A(c2) = k - SA;
1633  k = SA + BUCKET_A(c2 = c0);
1634  }
1635  assert(i < k);
1636  *k++ = s;
1637  } else {
1638  assert(s < 0);
1639  *i = ~s;
1640  }
1641  }
1642 }
1643 
1644 /* Constructs the burrows-wheeler transformed string directly
1645  by using the sorted order of type B* suffixes. */
1646 static
1647 int
1648 construct_BWT(const unsigned char *T, int *SA,
1649  int *bucket_A, int *bucket_B,
1650  int n, int m) {
1651  int *i, *j, *k, *orig;
1652  int s;
1653  int c0, c1, c2;
1654 
1655  if(0 < m) {
1656  /* Construct the sorted order of type B suffixes by using
1657  the sorted order of type B* suffixes. */
1658  for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1659  /* Scan the suffix array from right to left. */
1660  for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1661  j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1662  i <= j;
1663  --j) {
1664  if(0 < (s = *j)) {
1665  assert(T[s] == c1);
1666  assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1667  assert(T[s - 1] <= T[s]);
1668  c0 = T[--s];
1669  *j = ~((int)c0);
1670  if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1671  if(c0 != c2) {
1672  if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1673  k = SA + BUCKET_B(c2 = c0, c1);
1674  }
1675  assert(k < j);
1676  *k-- = s;
1677  } else if(s != 0) {
1678  *j = ~s;
1679 #ifndef NDEBUG
1680  } else {
1681  assert(T[s] == c1);
1682 #endif
1683  }
1684  }
1685  }
1686  }
1687 
1688  /* Construct the BWTed string by using
1689  the sorted order of type B suffixes. */
1690  k = SA + BUCKET_A(c2 = T[n - 1]);
1691  *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
1692  /* Scan the suffix array from left to right. */
1693  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1694  if(0 < (s = *i)) {
1695  assert(T[s - 1] >= T[s]);
1696  c0 = T[--s];
1697  *i = c0;
1698  if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1699  if(c0 != c2) {
1700  BUCKET_A(c2) = k - SA;
1701  k = SA + BUCKET_A(c2 = c0);
1702  }
1703  assert(i < k);
1704  *k++ = s;
1705  } else if(s != 0) {
1706  *i = ~s;
1707  } else {
1708  orig = i;
1709  }
1710  }
1711 
1712  return orig - SA;
1713 }
1714 
1715 
1716 /*---------------------------------------------------------------------------*/
1717 
1718 /*- Function -*/
1719 
1720 int
1721 divsufsort(const unsigned char *T, int *SA, int n) {
1722  int *bucket_A, *bucket_B;
1723  int m;
1724  int err = 0;
1725 
1726  /* Check arguments. */
1727  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
1728  else if(n == 0) { return 0; }
1729  else if(n == 1) { SA[0] = 0; return 0; }
1730  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
1731 
1732  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1733  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1734 
1735  /* Suffixsort. */
1736  if((bucket_A != NULL) && (bucket_B != NULL)) {
1737  m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
1738  construct_SA(T, SA, bucket_A, bucket_B, n, m);
1739  } else {
1740  err = -2;
1741  }
1742 
1743  free(bucket_B);
1744  free(bucket_A);
1745 
1746  return err;
1747 }
1748 
1749 int
1750 divbwt(const unsigned char *T, unsigned char *U, int *A, int n) {
1751  int *B;
1752  int *bucket_A, *bucket_B;
1753  int m, pidx, i;
1754 
1755  /* Check arguments. */
1756  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
1757  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
1758 
1759  if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
1760  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1761  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1762 
1763  /* Burrows-Wheeler Transform. */
1764  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1765  m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
1766  pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
1767 
1768  /* Copy to output string. */
1769  U[0] = T[n - 1];
1770  for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
1771  for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
1772  pidx += 1;
1773  } else {
1774  pidx = -2;
1775  }
1776 
1777  free(bucket_B);
1778  free(bucket_A);
1779  if(A == NULL) { free(B); }
1780 
1781  return pidx;
1782 }