37 #define INLINE __inline
38 #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
41 #if !defined(ALPHABET_SIZE)
42 # define ALPHABET_SIZE (256)
44 #define BUCKET_A_SIZE (ALPHABET_SIZE)
45 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
46 #if defined(SS_INSERTIONSORT_THRESHOLD)
47 # if SS_INSERTIONSORT_THRESHOLD < 1
48 # undef SS_INSERTIONSORT_THRESHOLD
49 # define SS_INSERTIONSORT_THRESHOLD (1)
52 # define SS_INSERTIONSORT_THRESHOLD (8)
54 #if defined(SS_BLOCKSIZE)
57 # define SS_BLOCKSIZE (0)
58 # elif 32768 <= SS_BLOCKSIZE
60 # define SS_BLOCKSIZE (32767)
63 # define SS_BLOCKSIZE (1024)
67 # define SS_MISORT_STACKSIZE (96)
68 #elif SS_BLOCKSIZE <= 4096
69 # define SS_MISORT_STACKSIZE (16)
71 # define SS_MISORT_STACKSIZE (24)
73 #define SS_SMERGE_STACKSIZE (32)
74 #define TR_INSERTIONSORT_THRESHOLD (8)
75 #define TR_STACKSIZE (64)
80 # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
83 # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
86 # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
88 #define STACK_PUSH(_a, _b, _c, _d)\
90 assert(ssize < STACK_SIZE);\
91 stack[ssize].a = (_a), stack[ssize].b = (_b),\
92 stack[ssize].c = (_c), stack[ssize++].d = (_d);\
94 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
96 assert(ssize < STACK_SIZE);\
97 stack[ssize].a = (_a), stack[ssize].b = (_b),\
98 stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
100 #define STACK_POP(_a, _b, _c, _d)\
103 if(ssize == 0) { return; }\
104 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
105 (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
107 #define STACK_POP5(_a, _b, _c, _d, _e)\
110 if(ssize == 0) { return; }\
111 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
112 (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
114 #define BUCKET_A(_c0) bucket_A[(_c0)]
115 #if ALPHABET_SIZE == 256
116 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
117 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
119 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
120 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
126 static const int lg_table[256]= {
127 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
128 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
129 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
130 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
131 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
132 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
133 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
134 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
137 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
142 #if SS_BLOCKSIZE == 0
143 return (n & 0xffff0000) ?
145 24 + lg_table[(n >> 24) & 0xff] :
146 16 + lg_table[(n >> 16) & 0xff]) :
148 8 + lg_table[(n >> 8) & 0xff] :
149 0 + lg_table[(n >> 0) & 0xff]);
150 #elif SS_BLOCKSIZE < 256
153 return (n & 0xff00) ?
154 8 + lg_table[(n >> 8) & 0xff] :
155 0 + lg_table[(n >> 0) & 0xff];
161 #if SS_BLOCKSIZE != 0
163 static const int sqq_table[256] = {
164 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
165 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
166 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
167 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
168 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
169 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
170 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
171 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
172 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
173 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
174 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
175 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
176 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
177 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
178 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
179 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
188 e = (x & 0xffff0000) ?
190 24 + lg_table[(x >> 24) & 0xff] :
191 16 + lg_table[(x >> 16) & 0xff]) :
193 8 + lg_table[(x >> 8) & 0xff] :
194 0 + lg_table[(x >> 0) & 0xff]);
197 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
198 if(e >= 24) { y = (y + 1 + x / y) >> 1; }
199 y = (y + 1 + x / y) >> 1;
201 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
203 return sqq_table[x] >> 4;
206 return (x < (y * y)) ? y - 1 : y;
217 ss_compare(
const unsigned char *T,
218 const int *p1,
const int *p2,
220 const unsigned char *U1, *U2, *U1n, *U2n;
222 for(U1 = T + depth + *p1,
223 U2 = T + depth + *p2,
224 U1n = T + *(p1 + 1) + 2,
225 U2n = T + *(p2 + 1) + 2;
226 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
231 (U2 < U2n ? *U1 - *U2 : 1) :
238 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
243 ss_insertionsort(
const unsigned char *T,
const int *PA,
244 int *first,
int *last,
int depth) {
249 for(i = last - 2; first <= i; --i) {
250 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
251 do { *(j - 1) = *j; }
while((++j < last) && (*j < 0));
252 if(last <= j) {
break; }
254 if(r == 0) { *j = ~*j; }
264 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
268 ss_fixdown(
const unsigned char *Td,
const int *PA,
269 int *SA,
int i,
int size) {
274 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
275 d = Td[PA[SA[k = j++]]];
276 if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
277 if(d <= c) {
break; }
285 ss_heapsort(
const unsigned char *Td,
const int *PA,
int *SA,
int size) {
290 if((size % 2) == 0) {
292 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) {
SWAP(SA[m], SA[m / 2]); }
295 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
296 if((size % 2) == 0) {
SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
297 for(i = m - 1; 0 < i; --i) {
298 t = SA[0], SA[0] = SA[i];
299 ss_fixdown(Td, PA, SA, 0, i);
310 ss_median3(
const unsigned char *Td,
const int *PA,
311 int *v1,
int *v2,
int *v3) {
313 if(Td[PA[*v1]] > Td[PA[*v2]]) {
SWAP(v1, v2); }
314 if(Td[PA[*v2]] > Td[PA[*v3]]) {
315 if(Td[PA[*v1]] > Td[PA[*v3]]) {
return v1; }
324 ss_median5(
const unsigned char *Td,
const int *PA,
325 int *v1,
int *v2,
int *v3,
int *v4,
int *v5) {
327 if(Td[PA[*v2]] > Td[PA[*v3]]) {
SWAP(v2, v3); }
328 if(Td[PA[*v4]] > Td[PA[*v5]]) {
SWAP(v4, v5); }
329 if(Td[PA[*v2]] > Td[PA[*v4]]) {
SWAP(v2, v4);
SWAP(v3, v5); }
330 if(Td[PA[*v1]] > Td[PA[*v3]]) {
SWAP(v1, v3); }
331 if(Td[PA[*v1]] > Td[PA[*v4]]) {
SWAP(v1, v4);
SWAP(v3, v5); }
332 if(Td[PA[*v3]] > Td[PA[*v4]]) {
return v4; }
339 ss_pivot(
const unsigned char *Td,
const int *PA,
int *first,
int *last) {
344 middle = first + t / 2;
348 return ss_median3(Td, PA, first, middle, last - 1);
351 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
355 first = ss_median3(Td, PA, first, first + t, first + (t << 1));
356 middle = ss_median3(Td, PA, middle - t, middle, middle + t);
357 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
358 return ss_median3(Td, PA, first, middle, last);
367 ss_partition(
const int *PA,
368 int *first,
int *last,
int depth) {
371 for(a = first - 1, b = last;;) {
372 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
373 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
374 if(b <= a) {
break; }
379 if(first < a) { *first = ~*first; }
386 ss_mintrosort(
const unsigned char *T,
const int *PA,
387 int *first,
int *last,
389 #define STACK_SIZE SS_MISORT_STACKSIZE
390 struct {
int *a, *b, c;
int d; } stack[
STACK_SIZE];
391 const unsigned char *Td;
392 int *a, *b, *c, *d, *e, *f;
398 for(ssize = 0, limit = ss_ilg(last - first);;) {
401 #if 1 < SS_INSERTIONSORT_THRESHOLD
402 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
409 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
411 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
412 if((x = Td[PA[*a]]) != v) {
413 if(1 < (a - first)) {
break; }
418 if(Td[PA[*first] - 1] < v) {
419 first = ss_partition(PA, first, a, depth);
421 if((a - first) <= (last - a)) {
422 if(1 < (a - first)) {
424 last = a, depth += 1, limit = ss_ilg(a - first);
426 first = a, limit = -1;
430 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
431 first = a, limit = -1;
433 last = a, depth += 1, limit = ss_ilg(a - first);
440 a = ss_pivot(Td, PA, first, last);
445 for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
446 if(((a = b) < last) && (x < v)) {
447 for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
448 if(x == v) {
SWAP(*b, *a); ++a; }
451 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
452 if((b < (d = c)) && (x > v)) {
453 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
454 if(x == v) {
SWAP(*c, *d); --d; }
459 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
460 if(x == v) {
SWAP(*b, *a); ++a; }
462 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
463 if(x == v) {
SWAP(*c, *d); --d; }
470 if((s = a - first) > (t = b - a)) { s = t; }
471 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) {
SWAP(*e, *f); }
472 if((s = d - c) > (t = last - d - 1)) { s = t; }
473 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) {
SWAP(*e, *f); }
475 a = first + (b - a), c = last - (d - c);
476 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
478 if((a - first) <= (last - c)) {
479 if((last - c) <= (c - b)) {
483 }
else if((a - first) <= (c - b)) {
490 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
493 if((a - first) <= (c - b)) {
497 }
else if((last - c) <= (c - b)) {
504 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
509 if(Td[PA[*first] - 1] < v) {
510 first = ss_partition(PA, first, last, depth);
511 limit = ss_ilg(last - first);
524 #if SS_BLOCKSIZE != 0
528 ss_blockswap(
int *a,
int *b,
int n) {
530 for(; 0 < n; --n, ++a, ++b) {
531 t = *a, *a = *b, *b = t;
537 ss_rotate(
int *first,
int *middle,
int *last) {
540 l = middle - first, r = last - middle;
541 for(; (0 < l) && (0 < r);) {
542 if(l == r) { ss_blockswap(first, middle, l);
break; }
544 a = last - 1, b = middle - 1;
547 *a-- = *b, *b-- = *a;
551 if((r -= l + 1) <= l) {
break; }
552 a -= 1, b = middle - 1;
557 a = first, b = middle;
560 *a++ = *b, *b++ = *a;
564 if((l -= r + 1) <= r) {
break; }
578 ss_inplacemerge(
const unsigned char *T,
const int *PA,
579 int *first,
int *middle,
int *last,
588 if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
589 else { x = 0; p = PA + *(last - 1); }
590 for(a = first, len = middle - first, half = len >> 1, r = -1;
592 len = half, half >>= 1) {
594 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
597 half -= (len & 1) ^ 1;
603 if(r == 0) { *a = ~*a; }
604 ss_rotate(a, middle, last);
607 if(first == middle) {
break; }
610 if(x != 0) {
while(*--last < 0) { } }
611 if(middle == last) {
break; }
621 ss_mergeforward(
const unsigned char *T,
const int *PA,
622 int *first,
int *middle,
int *last,
623 int *buf,
int depth) {
624 int *a, *b, *c, *bufend;
628 bufend = buf + (middle - first) - 1;
629 ss_blockswap(buf, first, middle - first);
631 for(t = *(a = first), b = buf, c = middle;;) {
632 r = ss_compare(T, PA + *b, PA + *c, depth);
636 if(bufend <= b) { *bufend = t;
return; }
641 *a++ = *c, *c++ = *a;
643 while(b < bufend) { *a++ = *b, *b++ = *a; }
652 if(bufend <= b) { *bufend = t;
return; }
657 *a++ = *c, *c++ = *a;
659 while(b < bufend) { *a++ = *b, *b++ = *a; }
671 ss_mergebackward(
const unsigned char *T,
const int *PA,
672 int *first,
int *middle,
int *last,
673 int *buf,
int depth) {
675 int *a, *b, *c, *bufend;
680 bufend = buf + (last - middle) - 1;
681 ss_blockswap(buf, middle, last - middle);
684 if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
685 else { p1 = PA + *bufend; }
686 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
687 else { p2 = PA + *(middle - 1); }
688 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
689 r = ss_compare(T, p1, p2, depth);
691 if(x & 1) {
do { *a-- = *b, *b-- = *a; }
while(*b < 0); x ^= 1; }
693 if(b <= buf) { *buf = t;
break; }
695 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
696 else { p1 = PA + *b; }
698 if(x & 2) {
do { *a-- = *c, *c-- = *a; }
while(*c < 0); x ^= 2; }
699 *a-- = *c, *c-- = *a;
701 while(buf < b) { *a-- = *b, *b-- = *a; }
705 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
706 else { p2 = PA + *c; }
708 if(x & 1) {
do { *a-- = *b, *b-- = *a; }
while(*b < 0); x ^= 1; }
710 if(b <= buf) { *buf = t;
break; }
712 if(x & 2) {
do { *a-- = *c, *c-- = *a; }
while(*c < 0); x ^= 2; }
713 *a-- = *c, *c-- = *a;
715 while(buf < b) { *a-- = *b, *b-- = *a; }
719 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
720 else { p1 = PA + *b; }
721 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
722 else { p2 = PA + *c; }
730 ss_swapmerge(
const unsigned char *T,
const int *PA,
731 int *first,
int *middle,
int *last,
732 int *buf,
int bufsize,
int depth) {
733 #define STACK_SIZE SS_SMERGE_STACKSIZE
734 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
735 #define MERGE_CHECK(a, b, c)\
738 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
741 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
745 struct {
int *a, *b, *c;
int d; } stack[
STACK_SIZE];
746 int *l, *r, *lm, *rm;
751 for(check = 0, ssize = 0;;) {
752 if((last - middle) <= bufsize) {
753 if((first < middle) && (middle < last)) {
754 ss_mergebackward(T, PA, first, middle, last, buf, depth);
761 if((middle - first) <= bufsize) {
763 ss_mergeforward(T, PA, first, middle, last, buf, depth);
770 for(m = 0, len =
MIN(middle - first, last - middle), half = len >> 1;
772 len = half, half >>= 1) {
773 if(ss_compare(T, PA +
GETIDX(*(middle + m + half)),
774 PA +
GETIDX(*(middle - m - half - 1)), depth) < 0) {
776 half -= (len & 1) ^ 1;
781 lm = middle - m, rm = middle + m;
782 ss_blockswap(lm, middle, m);
783 l = r = middle, next = 0;
787 if(first < lm) {
for(; *--l < 0;) { } next |= 4; }
789 }
else if(first < lm) {
790 for(; *r < 0; ++r) { }
795 if((l - first) <= (last - r)) {
796 STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
797 middle = lm, last = l, check = (check & 3) | (next & 4);
799 if((next & 2) && (r == middle)) { next ^= 6; }
800 STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
801 first = r, middle = rm, check = (next & 3) | (check & 4);
804 if(ss_compare(T, PA +
GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
822 sssort(
const unsigned char *T,
const int *PA,
823 int *first,
int *last,
824 int *buf,
int bufsize,
825 int depth,
int n,
int lastsuffix) {
827 #if SS_BLOCKSIZE != 0
828 int *b, *middle, *curbuf;
829 int j, k, curbufsize, limit;
833 if(lastsuffix != 0) { ++first; }
835 #if SS_BLOCKSIZE == 0
836 ss_mintrosort(T, PA, first, last, depth);
838 if((bufsize < SS_BLOCKSIZE) &&
839 (bufsize < (last - first)) &&
840 (bufsize < (limit = ss_isqrt(last - first)))) {
842 buf = middle = last - limit, bufsize = limit;
844 middle = last, limit = 0;
846 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a +=
SS_BLOCKSIZE, ++i) {
847 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
848 ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
849 #elif 1 < SS_BLOCKSIZE
850 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
854 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
855 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
856 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
859 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
860 ss_mintrosort(T, PA, a, middle, depth);
861 #elif 1 < SS_BLOCKSIZE
862 ss_insertionsort(T, PA, a, middle, depth);
864 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
866 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
871 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
872 ss_mintrosort(T, PA, middle, last, depth);
873 #elif 1 < SS_BLOCKSIZE
874 ss_insertionsort(T, PA, middle, last, depth);
876 ss_inplacemerge(T, PA, first, middle, last, depth);
880 if(lastsuffix != 0) {
882 int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
883 for(a = first, i = *(first - 1);
884 (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
898 return (n & 0xffff0000) ?
900 24 + lg_table[(n >> 24) & 0xff] :
901 16 + lg_table[(n >> 16) & 0xff]) :
903 8 + lg_table[(n >> 8) & 0xff] :
904 0 + lg_table[(n >> 0) & 0xff]);
913 tr_insertionsort(
const int *ISAd,
int *first,
int *last) {
917 for(a = first + 1; a < last; ++a) {
918 for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
919 do { *(b + 1) = *b; }
while((first <= --b) && (*b < 0));
920 if(b < first) {
break; }
922 if(r == 0) { *b = ~*b; }
932 tr_fixdown(
const int *ISAd,
int *SA,
int i,
int size) {
937 for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
938 d = ISAd[SA[k = j++]];
939 if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
940 if(d <= c) {
break; }
948 tr_heapsort(
const int *ISAd,
int *SA,
int size) {
953 if((size % 2) == 0) {
955 if(ISAd[SA[m / 2]] < ISAd[SA[m]]) {
SWAP(SA[m], SA[m / 2]); }
958 for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
959 if((size % 2) == 0) {
SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
960 for(i = m - 1; 0 < i; --i) {
961 t = SA[0], SA[0] = SA[i];
962 tr_fixdown(ISAd, SA, 0, i);
973 tr_median3(
const int *ISAd,
int *v1,
int *v2,
int *v3) {
975 if(ISAd[*v1] > ISAd[*v2]) {
SWAP(v1, v2); }
976 if(ISAd[*v2] > ISAd[*v3]) {
977 if(ISAd[*v1] > ISAd[*v3]) {
return v1; }
986 tr_median5(
const int *ISAd,
987 int *v1,
int *v2,
int *v3,
int *v4,
int *v5) {
989 if(ISAd[*v2] > ISAd[*v3]) {
SWAP(v2, v3); }
990 if(ISAd[*v4] > ISAd[*v5]) {
SWAP(v4, v5); }
991 if(ISAd[*v2] > ISAd[*v4]) {
SWAP(v2, v4);
SWAP(v3, v5); }
992 if(ISAd[*v1] > ISAd[*v3]) {
SWAP(v1, v3); }
993 if(ISAd[*v1] > ISAd[*v4]) {
SWAP(v1, v4);
SWAP(v3, v5); }
994 if(ISAd[*v3] > ISAd[*v4]) {
return v4; }
1001 tr_pivot(
const int *ISAd,
int *first,
int *last) {
1006 middle = first + t / 2;
1010 return tr_median3(ISAd, first, middle, last - 1);
1013 return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1017 first = tr_median3(ISAd, first, first + t, first + (t << 1));
1018 middle = tr_median3(ISAd, middle - t, middle, middle + t);
1019 last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1020 return tr_median3(ISAd, first, middle, last);
1036 trbudget_init(
trbudget_t *budget,
int chance,
int incval) {
1043 trbudget_check(
trbudget_t *budget,
int size) {
1044 if(size <= budget->remain) { budget->
remain -= size;
return 1; }
1045 if(budget->
chance == 0) { budget->
count += size;
return 0; }
1056 tr_partition(
const int *ISAd,
1057 int *first,
int *middle,
int *last,
1058 int **pa,
int **pb,
int v) {
1059 int *a, *b, *c, *d, *e, *f;
1063 for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1064 if(((a = b) < last) && (x < v)) {
1065 for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1066 if(x == v) {
SWAP(*b, *a); ++a; }
1069 for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1070 if((b < (d = c)) && (x > v)) {
1071 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1072 if(x == v) {
SWAP(*c, *d); --d; }
1077 for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1078 if(x == v) {
SWAP(*b, *a); ++a; }
1080 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1081 if(x == v) {
SWAP(*c, *d); --d; }
1087 if((s = a - first) > (t = b - a)) { s = t; }
1088 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) {
SWAP(*e, *f); }
1089 if((s = d - c) > (t = last - d - 1)) { s = t; }
1090 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) {
SWAP(*e, *f); }
1091 first += (b - a), last -= (d - c);
1093 *pa = first, *pb = last;
1098 tr_copy(
int *ISA,
const int *SA,
1099 int *first,
int *a,
int *b,
int *last,
1107 for(c = first, d = a - 1; c <= d; ++c) {
1108 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1113 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1114 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1123 tr_partialcopy(
int *ISA,
const int *SA,
1124 int *first,
int *a,
int *b,
int *last,
1128 int rank, lastrank, newrank = -1;
1132 for(c = first, d = a - 1; c <= d; ++c) {
1133 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1135 rank = ISA[s + depth];
1136 if(lastrank != rank) { lastrank =
rank; newrank = d - SA; }
1142 for(e = d; first <= e; --e) {
1144 if(lastrank != rank) { lastrank =
rank; newrank = e - SA; }
1145 if(newrank != rank) { ISA[*e] = newrank; }
1149 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1150 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1152 rank = ISA[s + depth];
1153 if(lastrank != rank) { lastrank =
rank; newrank = d - SA; }
1161 tr_introsort(
int *ISA,
const int *ISAd,
1162 int *SA,
int *first,
int *last,
1164 #define STACK_SIZE TR_STACKSIZE
1165 struct {
const int *a;
int *b, *c;
int d, e; }stack[
STACK_SIZE];
1169 int incr = ISAd - ISA;
1171 int ssize, trlink = -1;
1173 for(ssize = 0, limit = tr_ilg(last - first);;) {
1178 tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1182 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1185 for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1191 STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1194 if((a - first) <= (last - b)) {
1195 if(1 < (a - first)) {
1196 STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1197 last = a, limit = tr_ilg(a - first);
1198 }
else if(1 < (last - b)) {
1199 first = b, limit = tr_ilg(last - b);
1201 STACK_POP5(ISAd, first, last, limit, trlink);
1204 if(1 < (last - b)) {
1205 STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1206 first = b, limit = tr_ilg(last - b);
1207 }
else if(1 < (a - first)) {
1208 last = a, limit = tr_ilg(a - first);
1210 STACK_POP5(ISAd, first, last, limit, trlink);
1213 }
else if(limit == -2) {
1215 a = stack[--ssize].b, b = stack[ssize].c;
1216 if(stack[ssize].d == 0) {
1217 tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1219 if(0 <= trlink) { stack[trlink].d = -1; }
1220 tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1222 STACK_POP5(ISAd, first, last, limit, trlink);
1227 do { ISA[*a] = a - SA; }
while((++a < last) && (0 <= *a));
1231 a = first;
do { *a = ~*a; }
while(*++a < 0);
1232 next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1233 if(++a < last) {
for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1236 if(trbudget_check(budget, a - first)) {
1237 if((a - first) <= (last - a)) {
1239 ISAd += incr, last = a, limit =
next;
1241 if(1 < (last - a)) {
1243 first = a, limit = -3;
1245 ISAd += incr, last = a, limit =
next;
1249 if(0 <= trlink) { stack[trlink].d = -1; }
1250 if(1 < (last - a)) {
1251 first = a, limit = -3;
1253 STACK_POP5(ISAd, first, last, limit, trlink);
1257 STACK_POP5(ISAd, first, last, limit, trlink);
1264 tr_insertionsort(ISAd, first, last);
1270 tr_heapsort(ISAd, first, last - first);
1271 for(a = last - 1; first < a; a = b) {
1272 for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1279 a = tr_pivot(ISAd, first, last);
1284 tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1285 if((last - first) != (b - a)) {
1286 next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1289 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1290 if(b < last) {
for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1293 if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1294 if((a - first) <= (last - b)) {
1295 if((last - b) <= (b - a)) {
1296 if(1 < (a - first)) {
1300 }
else if(1 < (last - b)) {
1304 ISAd += incr, first = a, last = b, limit =
next;
1306 }
else if((a - first) <= (b - a)) {
1307 if(1 < (a - first)) {
1313 ISAd += incr, first = a, last = b, limit =
next;
1318 ISAd += incr, first = a, last = b, limit =
next;
1321 if((a - first) <= (b - a)) {
1322 if(1 < (last - b)) {
1326 }
else if(1 < (a - first)) {
1330 ISAd += incr, first = a, last = b, limit =
next;
1332 }
else if((last - b) <= (b - a)) {
1333 if(1 < (last - b)) {
1339 ISAd += incr, first = a, last = b, limit =
next;
1344 ISAd += incr, first = a, last = b, limit =
next;
1348 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1349 if((a - first) <= (last - b)) {
1350 if(1 < (a - first)) {
1353 }
else if(1 < (last - b)) {
1356 STACK_POP5(ISAd, first, last, limit, trlink);
1359 if(1 < (last - b)) {
1362 }
else if(1 < (a - first)) {
1365 STACK_POP5(ISAd, first, last, limit, trlink);
1370 if(trbudget_check(budget, last - first)) {
1371 limit = tr_ilg(last - first), ISAd += incr;
1373 if(0 <= trlink) { stack[trlink].d = -1; }
1374 STACK_POP5(ISAd, first, last, limit, trlink);
1388 trsort(
int *ISA,
int *SA,
int n,
int depth) {
1392 int t,
skip, unsorted;
1394 trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1396 for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1401 if((t = *first) < 0) { first -= t; skip += t; }
1403 if(skip != 0) { *(first +
skip) = skip; skip = 0; }
1404 last = SA + ISA[t] + 1;
1405 if(1 < (last - first)) {
1407 tr_introsort(ISA, ISAd, SA, first, last, &budget);
1408 if(budget.
count != 0) { unsorted += budget.
count; }
1409 else { skip = first - last; }
1410 }
else if((last - first) == 1) {
1415 }
while(first < (SA + n));
1416 if(skip != 0) { *(first +
skip) = skip; }
1417 if(unsorted == 0) {
break; }
1427 sort_typeBstar(
const unsigned char *T,
int *SA,
1428 int *bucket_A,
int *bucket_B,
1430 int *PAb, *ISAb, *buf;
1435 int i, j, k, t, m, bufsize;
1449 for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1451 do { ++
BUCKET_A(c1 = c0); }
while((0 <= --i) && ((c0 = T[i]) >= c1));
1457 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1483 PAb = SA + n - m; ISAb = SA + m;
1484 for(i = m - 2; 0 <= i; --i) {
1485 t = PAb[i], c0 = T[t], c1 = T[t + 1];
1488 t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1494 buf = SA + m, bufsize = (n - (2 * m)) / tmp;
1495 c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1496 #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
1499 curbuf = buf + tmp * bufsize;
1502 #pragma omp critical(sssort_lock)
1509 d1 = ALPHABET_SIZE - 1;
1510 if(--d0 < 0) {
break; }
1512 }
while(((l - k) <= 1) && (0 < (l = k)));
1513 c0 = d0, c1 = d1, j = k;
1516 if(l == 0) {
break; }
1517 sssort(T, PAb, SA + k, SA + l,
1518 curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1522 buf = SA + m, bufsize = n - (2 * m);
1523 for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1524 for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1527 sssort(T, PAb, SA + i, SA + j,
1528 buf, bufsize, 2, n, *(SA + i) == (m - 1));
1535 for(i = m - 1; 0 <= i; --i) {
1538 do { ISAb[SA[i]] = i; }
while((0 <= --i) && (0 <= SA[i]));
1540 if(i <= 0) {
break; }
1543 do { ISAb[SA[i] = ~SA[i]] = j; }
while(SA[--i] < 0);
1548 trsort(ISAb, SA, m, 1);
1551 for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1552 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1555 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1556 SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1561 BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n;
1562 for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1564 for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1571 --i, --k) { SA[i] = SA[k]; }
1584 construct_SA(
const unsigned char *T,
int *SA,
1585 int *bucket_A,
int *bucket_B,
1594 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1597 j = SA +
BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1602 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1603 assert(T[s - 1] <= T[s]);
1606 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1608 if(0 <= c2) {
BUCKET_B(c2, c1) = k - SA; }
1614 assert(((s == 0) && (T[s] == c1)) || (s < 0));
1624 *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1626 for(i = SA, j = SA + n; i < j; ++i) {
1628 assert(T[s - 1] >= T[s]);
1630 if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1648 construct_BWT(
const unsigned char *T,
int *SA,
1649 int *bucket_A,
int *bucket_B,
1651 int *i, *j, *k, *orig;
1658 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1661 j = SA +
BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1666 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1667 assert(T[s - 1] <= T[s]);
1670 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1672 if(0 <= c2) {
BUCKET_B(c2, c1) = k - SA; }
1691 *k++ = (T[n - 2] < c2) ? ~((
int)T[n - 2]) : (n - 1);
1693 for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1695 assert(T[s - 1] >= T[s]);
1698 if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1722 int *bucket_A, *bucket_B;
1727 if((T == NULL) || (SA == NULL) || (n < 0)) {
return -1; }
1728 else if(n == 0) {
return 0; }
1729 else if(n == 1) { SA[0] = 0;
return 0; }
1730 else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1;
return 0; }
1732 bucket_A = (
int *)malloc(BUCKET_A_SIZE *
sizeof(
int));
1733 bucket_B = (
int *)malloc(BUCKET_B_SIZE *
sizeof(
int));
1736 if((bucket_A != NULL) && (bucket_B != NULL)) {
1737 m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
1738 construct_SA(T, SA, bucket_A, bucket_B, n, m);
1750 divbwt(
const unsigned char *T,
unsigned char *U,
int *A,
int n) {
1752 int *bucket_A, *bucket_B;
1756 if((T == NULL) || (U == NULL) || (n < 0)) {
return -1; }
1757 else if(n <= 1) {
if(n == 1) { U[0] = T[0]; }
return n; }
1759 if((B = A) == NULL) { B = (
int *)malloc((
size_t)(n + 1) *
sizeof(
int)); }
1760 bucket_A = (
int *)malloc(BUCKET_A_SIZE *
sizeof(
int));
1761 bucket_B = (
int *)malloc(BUCKET_B_SIZE *
sizeof(
int));
1764 if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1765 m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
1766 pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
1770 for(i = 0; i < pidx; ++i) { U[i + 1] = (
unsigned char)B[i]; }
1771 for(i += 1; i < n; ++i) { U[i] = (
unsigned char)B[i]; }
1779 if(A == NULL) { free(B); }