29 #ifndef _DIVSUFSORTXX_H_
30 #define _DIVSUFSORTXX_H_
36 namespace divsufsortxx {
41 template<
typename ISAIterator_type,
typename SAIterator_type>
44 SAIterator_type first, SAIterator_type last) {
45 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
46 typedef typename std::iterator_traits<ISAIterator_type>::value_type value_type;
51 for(a = first + 1; a < last; ++a) {
52 for(v = ISAd[t = *a], b = a; v < (x = ISAd[*(b - 1)]);) {
53 do { *b = *(b - 1); }
while((first < --b) && (*(b - 1) < 0));
54 if(b <= first) {
break; }
56 if(v == x) { *(b - 1) = ~*(b - 1); }
66 template<
typename StringIterator_type,
typename SAIterator_type,
typename pos_type>
68 fixdown(
const StringIterator_type Td, SAIterator_type SA, pos_type i, pos_type size) {
69 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
73 for(t = SA[i], c = Td[t]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
75 if(d < (e = Td[SA[j]])) { k = j; d = e; }
82 template<
typename StringIterator_type,
typename SAIterator_type>
84 heapsort(
const StringIterator_type Td, SAIterator_type SA,
85 typename std::iterator_traits<SAIterator_type>::value_type size) {
86 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
93 if(Td[SA[m / 2]] < Td[SA[m]]) {
98 for(i = m / 2 - 1; 0 <= i; --i) {
102 if((size % 2) == 0) {
104 fixdown(Td, SA, pos_type(0), m);
107 for(i = m - 1; 0 < i; --i) {
110 fixdown(Td, SA, pos_type(0), i);
121 -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
122 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
123 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
124 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
125 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
126 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
127 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
128 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
129 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
130 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
131 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
132 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
133 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
134 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
135 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
136 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
139 template<
typename numeric_type>
143 while(255 < n) { lgsize += 8, n >>= 8; }
144 return lgsize + log2table[n];
147 template<
typename Iterator1_type,
typename Iterator2_type>
149 vecswap(Iterator1_type first1, Iterator1_type last1, Iterator2_type first2) {
150 for(; first1 != last1; ++first1, ++first2) {
160 template<
typename StringIterator_type,
typename SAIterator_type>
163 SAIterator_type v1, SAIterator_type v2, SAIterator_type v3) {
164 if(Td[*v1] > Td[*v2]) {
std::swap(v1, v2); }
165 if(Td[*v2] > Td[*v3]) {
166 if(Td[*v1] > Td[*v3]) {
return v1; }
173 template<
typename StringIterator_type,
typename SAIterator_type>
176 SAIterator_type v1, SAIterator_type v2, SAIterator_type v3,
177 SAIterator_type v4, SAIterator_type v5) {
178 if(Td[*v2] > Td[*v3]) {
std::swap(v2, v3); }
179 if(Td[*v4] > Td[*v5]) {
std::swap(v4, v5); }
181 if(Td[*v1] > Td[*v3]) {
std::swap(v1, v3); }
183 if(Td[*v3] > Td[*v4]) {
return v4; }
188 template<
typename StringIterator_type,
typename SAIterator_type>
191 SAIterator_type first, SAIterator_type last) {
192 typedef typename std::iterator_traits<SAIterator_type>::difference_type difference_type;
193 difference_type t = last - first;
194 SAIterator_type middle = first + t / 2;
198 return median3(Td, first, middle, last - 1);
202 first, first + t, middle,
203 last - 1 - t, last - 1);
208 median3(Td, first, first + t, first + (t << 1)),
209 median3(Td, middle - t, middle, middle + t),
210 median3(Td, last - 1 - (t << 1), last - 1 - t, last - 1));
214 template<
typename StringIterator_type,
typename SAIterator_type>
217 SAIterator_type first1, SAIterator_type first2, SAIterator_type last,
218 SAIterator_type &mfirst, SAIterator_type &mlast,
219 const typename std::iterator_traits<StringIterator_type>::value_type &v) {
220 SAIterator_type a, b, c, d;
221 typename std::iterator_traits<StringIterator_type>::value_type x = 0;
223 for(b = first2; (b < last) && ((x = Td[*b]) == v); ++b) { }
224 if(((a = b) < last) && (x < v)) {
225 for(; (++b < last) && ((x = Td[*b]) <= v);) {
229 for(c = last; (b < --c) && ((x = Td[*c]) == v);) { }
230 if((b < (d = c)) && (x > v)) {
231 for(; (b < --c) && ((x = Td[*c]) >= v);) {
237 for(; (++b < c) && ((x = Td[*b]) <= v);) {
240 for(; (b < --c) && ((x = Td[*c]) >= v);) {
248 mfirst = first1 + (b - a), mlast = last - (d + 1 - b);
251 mfirst = first1, mlast = last;
258 template<
typename a_type,
typename b_type,
typename c_type>
266 template<
typename a_type,
typename b_type,
typename c_type,
typename d_type>
277 #define STACK_POP3(_a, _b, _c)\
279 if(stack.empty()) { return; }\
280 stackinfo_type tempinfo = stack.top();\
281 (_a) = tempinfo.m_a, (_b) = tempinfo.m_b, (_c) = tempinfo.m_c;\
284 #define STACK_POP4(_a, _b, _c, _d)\
286 if(stack.empty()) { return; }\
287 stackinfo_type tempinfo = stack.top();\
288 (_a) = tempinfo.m_a, (_b) = tempinfo.m_b, (_c) = tempinfo.m_c, (_d) = tempinfo.m_d;\
291 #define STACK_PUSH3(_a, _b, _c)\
292 stack.push(stackinfo_type((_a), (_b), (_c)))
293 #define STACK_PUSH4(_a, _b, _c, _d)\
294 stack.push(stackinfo_type((_a), (_b), (_c), (_d)))
297 namespace substring {
299 template<
typename StringIterator_type,
typename SAIterator_type>
302 const SAIterator_type p1,
const SAIterator_type p2,
303 typename std::iterator_traits<SAIterator_type>::value_type depth) {
304 StringIterator_type U1 = T + depth + *p1,
305 U2 = T + depth + *p2,
306 U1n = T + *(p1 + 1) + 2,
307 U2n = T + *(p2 + 1) + 2;
308 for(; (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); ++U1, ++U2) { }
311 (U2 < U2n ? (*U2 < *U1) * 2 - 1 : 1) :
316 template<
typename StringIterator_type,
typename SAIterator_type>
319 const SAIterator_type p1,
const SAIterator_type p2,
320 typename std::iterator_traits<SAIterator_type>::value_type depth,
321 typename std::iterator_traits<SAIterator_type>::value_type size) {
322 StringIterator_type U1 = T + depth + *p1,
323 U2 = T + depth + *p2,
325 U2n = T + *(p2 + 1) + 2;
326 for(; (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); ++U1, ++U2) { }
329 (U2 < U2n ? (*U2 < *U1) * 2 - 1 : 1) :
334 template<
typename StringIterator_type,
typename SAIterator_type>
337 SAIterator_type first, SAIterator_type last,
338 typename std::iterator_traits<SAIterator_type>::value_type depth) {
339 SAIterator_type a, b;
340 typename std::iterator_traits<SAIterator_type>::value_type t;
343 for(a = last - 1; first < a; --a) {
344 for(t = *(a - 1), b = a; 0 < (r =
compare(T, PA + t, PA + *b, depth));) {
345 do { *(b - 1) = *b; }
while((++b < last) && (*b < 0));
346 if(last <= b) {
break; }
348 if(r == 0) { *b = ~*b; }
353 template<
typename StringIterator_type,
typename PAIterator_type>
356 StringIterator_type m_Td;
357 const PAIterator_type m_PA;
359 typedef typename std::iterator_traits<StringIterator_type>::value_type
value_type;
372 return m_Td[m_PA[i]];
381 template<
typename SAIterator_type>
384 SAIterator_type first, SAIterator_type last,
385 typename std::iterator_traits<SAIterator_type>::value_type depth) {
386 SAIterator_type a, b;
387 for(a = first, b = last - 1;; ++a, --b) {
388 for(; (a <= b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1)); ++a) { *a = ~*a; }
389 for(; (a < b) && ((PA[*b] + depth) < (PA[*b + 1] + 1)); --b) { }
390 if(b <= a) {
break; }
394 if(first < a) { *first = ~*first; }
398 template<
typename stack_type,
typename StringIterator_type,
typename SAIterator_type>
400 mintrosort(stack_type &stack,
const StringIterator_type T,
const SAIterator_type PA,
401 SAIterator_type first, SAIterator_type last,
402 typename std::iterator_traits<SAIterator_type>::value_type depth) {
403 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
404 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
406 typedef typename stack_type::value_type stackinfo_type;
408 SAIterator_type a, b, c;
414 if((last - first) <= 8) {
415 if(1 < (last - first)) {
insertionsort(T, PA, first, last, depth); }
420 const StringIterator_type Td = T + depth;
421 if(limit-- == 0) {
helper::heapsort(wrapper_type(Td, PA), first, value_type( last - first )); }
423 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
424 if((x = Td[PA[*a]]) != v) {
425 if(1 < (a - first)) {
break; }
430 if(Td[PA[*first] - 1] < v) {
433 if((a - first) <= (last - a)) {
434 if(1 < (a - first)) {
436 last = a, depth += 1, limit =
helper::lg(a - first);
438 first = a, limit = -1;
443 first = a, limit = -1;
445 last = a, depth += 1, limit =
helper::lg(a - first);
454 if(
helper::partition(wrapper_type(Td, PA), first, first + 1, last, a, c, Td[PA[*first]]) !=
false) {
455 b = (Td[PA[*a]] <= Td[PA[*a] - 1]) ? a :
partition(PA, a, c, depth);
457 if((a - first) <= (last - c)) {
458 if((last - c) <= (c - b)) {
462 }
else if((a - first) <= (c - b)) {
469 first = b, last = c, depth += 1, limit =
helper::lg(c - b);
472 if((a - first) <= (c - b)) {
476 }
else if((last - c) <= (c - b)) {
483 first = b, last = c, depth += 1, limit =
helper::lg(c - b);
488 if(Td[PA[*first] - 1] < Td[PA[*first]]) {
489 first =
partition(PA, first, last, depth);
498 template<
typename StringIterator_type,
typename SAIterator_type,
typename BufIterator_type>
501 const SAIterator_type PA, BufIterator_type buf,
502 SAIterator_type first, SAIterator_type middle, SAIterator_type last,
503 typename std::iterator_traits<SAIterator_type>::value_type depth) {
504 SAIterator_type i, k;
505 BufIterator_type j, bufend;
506 typename std::iterator_traits<SAIterator_type>::value_type t;
509 bufend = buf + (middle - first);
512 for(t = *first, i = first, j = buf, k = middle;;) {
513 r =
compare(T, PA + *j, PA + *k, depth);
516 *i++ = *j; *j++ = *i;
517 if(bufend <= j) { *(bufend - 1) = t;
return; }
521 *i++ = *k; *k++ = *i;
523 do { *i++ = *j; *j++ = *i; }
while(j < bufend);
531 *i++ = *j; *j++ = *i;
532 if(bufend <= j) { *(bufend - 1) = t;
return; }
536 *i++ = *k; *k++ = *i;
538 do { *i++ = *j; *j++ = *i; }
while(j < bufend);
548 template<
typename StringIterator_type,
typename SAIterator_type,
typename BufIterator_type>
551 const SAIterator_type PA, BufIterator_type buf,
552 SAIterator_type first, SAIterator_type middle, SAIterator_type last,
553 typename std::iterator_traits<SAIterator_type>::value_type depth) {
554 SAIterator_type p1, p2;
555 SAIterator_type i, k;
556 BufIterator_type j, bufend;
557 typename std::iterator_traits<SAIterator_type>::value_type t;
560 bufend = buf + (last - middle);
564 if(*(bufend - 1) < 0) { x |= 1; p1 = PA + ~*(bufend - 1); }
565 else { p1 = PA + *(bufend - 1); }
566 if(*(middle - 1) < 0) { x |= 2; p2 = PA + ~*(middle - 1); }
567 else { p2 = PA + *(middle - 1); }
568 for(t = *(last - 1), i = last - 1, j = bufend - 1, k = middle - 1;;) {
572 if(x & 1) {
do { *i-- = *j; *j-- = *i; }
while(*j < 0); x ^= 1; }
574 if(j <= buf) { *buf = t;
return; }
576 if(*--j < 0) { x |= 1; p1 = PA + ~*j; }
577 else { p1 = PA + *j; }
579 if(x & 2) {
do { *i-- = *k; *k-- = *i; }
while(*k < 0); x ^= 2; }
582 while(buf < j) { *i-- = *j; *j-- = *i; }
587 if(*--k < 0) { x |= 2; p2 = PA + ~*k; }
588 else { p2 = PA + *k; }
590 if(x & 1) {
do { *i-- = *j; *j-- = *i; }
while(*j < 0); x ^= 1; }
592 if(j <= buf) { *buf = t;
return; }
595 if(x & 2) {
do { *i-- = *k; *k-- = *i; }
while(*k < 0); x ^= 2; }
598 while(buf < j) { *i-- = *j; *j-- = *i; }
604 if(*j < 0) { x |= 1; p1 = PA + ~*j; }
605 else { p1 = PA + *j; }
606 if(*k < 0) { x |= 2; p2 = PA + ~*k; }
607 else { p2 = PA + *k; }
613 template<
typename stack_type,
typename StringIterator_type,
typename SAIterator_type,
typename BufIterator_type>
615 merge(stack_type &stack,
const StringIterator_type T,
const SAIterator_type PA,
616 SAIterator_type first, SAIterator_type middle, SAIterator_type last,
617 BufIterator_type buf,
typename std::iterator_traits<SAIterator_type>::value_type bufsize,
618 typename std::iterator_traits<SAIterator_type>::value_type depth) {
619 typedef typename std::iterator_traits<SAIterator_type>::difference_type difference_type;
620 typedef typename stack_type::value_type stackinfo_type;
621 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
622 #define MERGE_CHECK(a)\
625 (compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0)) {\
630 SAIterator_type i, j;
631 difference_type m, len, half;
636 if((last - middle) <= bufsize) {
637 if((first < middle) && (middle < last)) {
646 if((middle - first) <= bufsize) {
656 for(m = 0, len =
std::min(middle - first, last - middle), half = len >> 1;
658 len = half, half >>= 1) {
660 PA +
GETIDX(*(middle - m - half - 1)), depth) < 0) {
662 half -= ((len & 1) == 0);
668 i = j = middle, next = 0;
669 if((middle + m) < last) {
670 if(*(middle + m) < 0) {
671 for(; *(i - 1) < 0; --i) { }
672 *(middle + m) = ~*(middle + m);
674 for(j = middle; *j < 0; ++j) { }
677 if((i - first) <= (last - j)) {
678 STACK_PUSH4(j, middle + m, last, (check & 2) | (next & 1));
679 middle -= m, last = i, check = (check & 1);
681 if((i == middle) && (middle == j)) { next <<= 1; }
682 STACK_PUSH4(first, middle - m, i, (check & 1) | (next & 2));
683 first = j, middle += m, check = (check & 2) | (next & 1);
696 template<
typename StringIterator_type,
typename SAIterator_type,
typename BufIterator_type>
698 sort(
const StringIterator_type T,
const SAIterator_type PA,
699 SAIterator_type first, SAIterator_type last,
700 BufIterator_type buf,
typename std::iterator_traits<SAIterator_type>::value_type bufsize,
701 typename std::iterator_traits<SAIterator_type>::value_type depth,
702 typename std::iterator_traits<SAIterator_type>::value_type size,
703 bool lastsuffix,
int blocksize = 1024) {
704 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
705 std::stack<helper::stackinfo4<SAIterator_type, SAIterator_type, pos_type, int> > stack1;
706 std::stack<helper::stackinfo4<SAIterator_type, SAIterator_type, SAIterator_type, int> > stack2;
708 SAIterator_type a, b;
709 SAIterator_type curbuf;
710 pos_type i, j, k, curbufsize;
712 if(lastsuffix !=
false) { ++first; }
713 for(a = first, i = 0; blocksize < (last - a); a += blocksize, ++i) {
714 mintrosort(stack1, T, PA, a, a + blocksize, depth);
715 curbuf = a + blocksize;
716 curbufsize = pos_type( last - (a + blocksize) );
717 if(bufsize <= curbufsize) {
718 for(b = a, k = blocksize, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
719 merge(stack2, T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
722 for(b = a, k = blocksize, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
723 merge(stack2, T, PA, b - k, b, b + k, buf, bufsize, depth);
728 for(k = blocksize; i != 0; k <<= 1, i >>= 1) {
730 merge(stack2, T, PA, a - k, a, last, buf, bufsize, depth);
735 if(lastsuffix !=
false) {
737 for(a = first, i = *(first - 1);
738 (a < last) && ((*a < 0) || (0 <
compare_last(T, PA + i, PA + *a, depth, size)));
753 template<
typename ISAIterator_type,
typename SAIterator_type>
756 SAIterator_type first, SAIterator_type last) {
757 typedef typename std::iterator_traits<ISAIterator_type>::value_type value_type;
758 typedef typename std::iterator_traits<SAIterator_type>::value_type sa_type;
759 SAIterator_type a, b;
762 for(a = first; a < last; ++a) {
765 do { ISA[*a] = sa_type( a - SA ); }
while((++a < last) && (0 <= *a));
766 *b = sa_type( b - a );
767 if(last <= a) {
break; }
770 do { *a = ~*a; }
while(*++a < 0);
771 t = value_type( a - SA );
772 do { ISA[*b] = t; }
while(++b <= a);
776 template<
typename stack_type,
typename ISAIterator_type,
typename SAIterator_type>
778 introsort(stack_type &stack, ISAIterator_type ISA,
const ISAIterator_type ISAd,
779 const SAIterator_type SA, SAIterator_type first, SAIterator_type last) {
780 typedef typename std::iterator_traits<ISAIterator_type>::value_type value_type;
781 typedef typename std::iterator_traits<SAIterator_type>::value_type sa_type;
782 typedef typename stack_type::value_type stackinfo_type;
784 SAIterator_type a, b, c;
789 if((last - first) <= 8) {
790 if(1 < (last - first)) {
793 }
else if((last - first) == 1) { *first = -1; }
800 for(a = last - 1, v = ISAd[*a]; first < a;) {
801 if((x = ISAd[*--a]) == v) { *a = ~*a; }
811 if(
helper::partition(ISAd, first, first + 1, last, a, b, ISAd[*first]) !=
false) {
814 for(c = first, v = value_type( a - SA - 1 ); c < a; ++c) { ISA[*c] = v; }
815 if(b < last) {
for(c = a, v = value_type( b - SA - 1 ); c < b; ++c) { ISA[*c] = v; } }
816 if((b - a) == 1) { *a = - 1; }
818 if((a - first) <= (last - b)) {
839 template<
typename ISAIterator_type,
typename SAIterator_type>
841 sort(ISAIterator_type ISA, SAIterator_type first, SAIterator_type last) {
842 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
843 std::stack<helper::stackinfo3<SAIterator_type, SAIterator_type, int> > stack;
844 SAIterator_type a, b;
845 pos_type t,
skip, depth, size;
847 for(size = pos_type( last - first ), depth = 1; -size < *first; depth *= 2) {
850 if((t = *a) < 0) { a -= t; skip += t; }
852 if(skip != 0) { *(a +
skip) = skip; skip = 0; }
853 b = first + ISA[t] + 1;
854 introsort(stack, ISA, ISA + depth, first, a, b);
858 if(skip != 0) { *(a +
skip) = skip; }
866 namespace tandemrepeat {
868 template<
typename stack_type,
typename ISAIterator_type,
typename SAIterator_type,
typename pos_type>
870 introsort(stack_type &stack, ISAIterator_type ISA, ISAIterator_type ISAd,
871 const SAIterator_type SA, SAIterator_type first, SAIterator_type last,
872 pos_type &budget,
int &chance, pos_type size) {
873 typedef typename std::iterator_traits<ISAIterator_type>::value_type value_type;
874 typedef typename stack_type::value_type stackinfo_type;
875 #define UPDATE_BUDGET(n)\
880 if(--chance == 0) { break; }\
884 SAIterator_type a, b, c, d, e;
894 helper::partition(ISAd - 1, first, first, last, a, b, value_type( last - SA - 1 ));
898 for(c = first, v = value_type( a - SA - 1 ); c < a; ++c) { ISA[*c] = v; }
901 for(c = a, v = value_type( b - SA - 1 ); c < b; ++c) { ISA[*c] = v; }
907 if((a - first) <= (last - b)) {
910 }
else { first = b; }
917 }
else if(limit == -2) {
919 stackinfo_type temp = stack.top(); stack.pop();
920 a = temp.m_b, b = temp.m_c;
921 t = value_type( ISAd - ISA );
922 v = value_type( b - SA - 1 );
923 for(c = first, d = a; c < d; ++c) {
924 if((0 <= (s = *c - t)) && (ISA[s] == v)) {
925 ISA[s] = value_type( d - SA );
929 for(c = last - 1, e = d, d = b; e < d; --c) {
930 if((0 <= (s = *c - t)) && (ISA[s] == v)) {
932 ISA[s] = value_type( d - SA );
940 do { ISA[*a] = value_type( a - SA ); }
while((++a < last) && (0 <= *a));
944 a = first;
do { *a = ~*a; }
while(*++a < 0); ++a;
946 for(c = first, v = value_type( a - SA - 1 ); c < a; ++c) { ISA[*c] = v; }
950 next = (ISA[*first] == ISAd[*first]) ? -1 :
helper::lg(a - first);
952 if((a - first) <= (last - a)) {
955 ISAd += 1, last = a, limit =
next;
957 first = a, limit = -3;
962 first = a, limit = -3;
964 ISAd += 1, last = a, limit =
next;
974 if((last - first) <= 8) {
975 if(1 < (last - first)) {
986 for(a = last - 1, v = ISAd[*a]; first < a;) {
987 if((x = ISAd[*--a]) == v) { *a = ~*a; }
996 if(
helper::partition(ISAd, first, first + 1, last, a, b, ISAd[*first]) !=
false) {
997 next = (ISA[*a] == ISAd[*a]) ? -1 :
helper::lg(b - a);
1000 for(c = first, v = value_type( a - SA - 1 ); c < a; ++c) { ISA[*c] = v; }
1001 if(b < last) {
for(c = a, v = value_type( b - SA - 1 ); c < b; ++c) { ISA[*c] = v; } }
1005 if((a - first) <= (last - b)) {
1006 if((last - b) <= (b - a)) {
1010 }
else if((a - first) <= (b - a)) {
1017 ISAd += 1, first = a, last = b, limit =
next;
1020 if((a - first) <= (b - a)) {
1024 }
else if((last - b) <= (b - a)) {
1031 ISAd += 1, first = a, last = b, limit =
next;
1035 limit = (ISA[*first] == ISAd[*first]) ? -1 : (limit + 1), ISAd += 1;
1040 for(; stack.empty() ==
false; stack.pop()) {
1041 stackinfo_type temp = stack.top();
1042 if(temp.m_d == -3) {
1043 first = temp.m_b, last = temp.m_c;
1044 for(a = first; a < last; ++a) {
1047 do { ISA[*a] = value_type( a - SA ); }
while((++a < last) && (0 <= *a));
1048 if(last <= a) {
break; }
1051 do { *a = ~*a; }
while(*++a < 0);
1052 v = value_type( a - SA );
1053 do { ISA[*b] = v; }
while(++b <= a);
1057 #undef UPDATE_BUDGET
1060 template<
typename ISAIterator_type,
typename SAIterator_type>
1062 sort(ISAIterator_type ISA, SAIterator_type first, SAIterator_type last) {
1063 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
1064 std::stack<helper::stackinfo4<ISAIterator_type, SAIterator_type, SAIterator_type, int> > stack;
1065 SAIterator_type a, b;
1066 pos_type t,
skip, size, budget;
1069 size = pos_type( last - first );
1070 if(-size < *first) {
1071 a = first, skip = 0, budget = size;
1076 if((t = *a) < 0) { a -= t; skip += t; }
1079 b = first + ISA[t] + 1;
1080 introsort(stack, ISA, ISA + 1, first, a, b, budget, chance, size);
1083 if(first < a) { *first = -pos_type(a - first); }
1104 static const int EXTRA_SPACE = 0;
1107 #define BUCKET_A(c0) bucket_A[(c0)]
1108 #define BUCKET_B(c0, c1) (bucket_B[((alphabetsize_type)(c1)) * alphabetsize + (alphabetsize_type)(c0)])
1109 #define BUCKET_BSTAR(c0, c1) (bucket_B[((alphabetsize_type)(c0)) * alphabetsize + (alphabetsize_type)(c1)])
1111 static const int MERGE_BUFSIZE = 256;
1114 template<
typename StringIterator_type,
typename SAIterator_type,
typename pos_type,
typename alphabet
size_type>
1117 pos_type *bucket_A, pos_type *bucket_B,
1118 pos_type n, pos_type SAsize, alphabetsize_type alphabetsize) {
1119 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
1120 pos_type i, j, k, t, m, bufsize;
1121 alphabetsize_type c0, c1;
1124 for(i = 0; i < alphabetsize; ++i) { bucket_A[i] = 0; }
1125 for(i = 0; i < alphabetsize * alphabetsize; ++i) { bucket_B[i] = 0; }
1130 for(i = n - 1, m = SAsize; 0 <= i;) {
1132 do { ++
BUCKET_A(T[i]); }
while((0 <= --i) && (T[i] >= T[i + 1]));
1138 for(--i; (0 <= i) && (T[i] <= T[i + 1]); --i) {
1146 for(c0 = 0, i = -1, j = 0; c0 < alphabetsize; ++c0) {
1150 for(c1 = c0 + 1; c1 < alphabetsize; ++c1) {
1159 SAIterator_type PAb = SA + SAsize - m, ISAb = SA + m;
1160 for(i = m - 2; 0 <= i; --i) {
1161 t = PAb[i], c0 = T[t], c1 = T[t + 1];
1164 t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1168 bufsize = SAsize - 2 * m;
1169 if(MERGE_BUFSIZE < bufsize) {
1170 SAIterator_type buf = SA + m;
1171 for(c0 = alphabetsize - 1, j = m; 0 < j; --c0) {
1172 for(c1 = alphabetsize - 1; c0 < c1; j = i, --c1) {
1175 substring::sort(T, PAb, SA + i, SA + j, buf, bufsize, 2, n, *(SA + i) == (m - 1));
1180 pos_type *lbuf = NULL;
1183 lbuf =
new pos_type[MERGE_BUFSIZE];
if(lbuf == NULL) {
throw; }
1184 for(c0 = alphabetsize - 1, j = m; 0 < j; --c0) {
1185 for(c1 = alphabetsize - 1; c0 < c1; j = i, --c1) {
1188 substring::sort(T, PAb, SA + i, SA + j, lbuf, MERGE_BUFSIZE, 2, n, *(SA + i) == (m - 1));
1196 if(err != 0) {
throw; }
1200 for(i = m - 1; 0 <= i; --i) {
1203 do { ISAb[SA[i]] = i; }
while((0 <= --i) && (0 <= SA[i]));
1205 if(i <= 0) {
break; }
1208 do { ISAb[SA[i] = ~SA[i]] = j; }
while(SA[--i] < 0);
1218 for(i = n - 1, j = m; 0 <= i;) {
1219 for(--i; (0 <= i) && (T[i] >= T[i + 1]); --i) { }
1222 for(--i; (0 <= i) && (T[i] <= T[i + 1]); --i) { }
1227 for(c0 = alphabetsize - 1, i = n - 1, k = m - 1; 0 <= c0; --c0) {
1228 for(c1 = alphabetsize - 1; c0 < c1; --c1) {
1235 --i, --k) { SA[i] = SA[k]; }
1239 if(c0 < (alphabetsize - 1)) {
1250 template<
typename StringIterator_type,
typename SAIterator_type,
typename pos_type,
typename alphabet
size_type>
1253 pos_type *bucket_A, pos_type *bucket_B,
1254 pos_type n, pos_type m, alphabetsize_type alphabetsize) {
1255 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
1256 SAIterator_type i, j, t = SA;
1258 alphabetsize_type c0, c1, c2;
1265 for(c1 = alphabetsize - 2; 0 <= c1; --c1) {
1271 if(0 <= (s = *--j)) {
1272 if((0 <= --s) && ((c0 = T[s]) <= c1)) {
1274 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1275 if(c2 == c0) { *--t = s; }
1277 if(0 <= c2) {
BUCKET_B(c2, c1) = t - SA; }
1278 *(t = SA +
BUCKET_B(c2 = c0, c1) - 1) = s;
1290 *(t = SA + (
BUCKET_A(c2 = T[n - 1]) + 1)) = n - 1;
1292 for(i = SA, j = SA + n; i < j; ++i) {
1294 if((0 <= --s) && ((c0 = T[s]) >= T[s + 1])) {
1295 if((0 < s) && (T[s - 1] < c0)) { s = ~s; }
1296 if(c0 == c2) { *++t = s; }
1299 *(t = SA + (
BUCKET_A(c2 = c0) + 1)) = s;
1311 template<
typename StringIterator_type,
typename SAIterator_type,
typename pos_type,
typename alphabet
size_type>
1314 pos_type *bucket_A, pos_type *bucket_B,
1315 pos_type n, pos_type m, alphabetsize_type alphabetsize) {
1316 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
1317 SAIterator_type i, j, t = SA, orig;
1319 alphabetsize_type c0, c1, c2;
1326 for(c1 = alphabetsize - 2; 0 <= c1; --c1) {
1332 if(0 <= (s = *--j)) {
1333 if((0 <= --s) && ((c0 = T[s]) <= c1)) {
1334 *j = ~((pos_type)c0);
1335 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1336 if(c0 == c2) { *--t = s; }
1338 if(0 <= c2) {
BUCKET_B(c2, c1) = pos_type( t - SA ); }
1339 *(t = SA +
BUCKET_B(c2 = c0, c1) - 1) = s;
1352 if(T[s - 1] < c0) { s = ~((pos_type)T[s - 1]); }
1353 *(t = SA + (
BUCKET_A(c2 = c0) + 1)) = s;
1355 for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1357 if((0 <= --s) && ((c0 = T[s]) >= T[s + 1])) {
1359 if((0 < s) && (T[s - 1] < c0)) { s = ~((pos_type)T[s - 1]); }
1360 if(c0 == c2) { *++t = s; }
1363 *(t = SA + (
BUCKET_A(c2 = c0) + 1)) = s;
1365 }
else if(s < 0) { orig = i; }
1395 template<
typename StringIterator_type,
typename SAIterator_type,
typename alphabet
size_type>
1397 constructSA(
const StringIterator_type T,
const StringIterator_type T_last,
1398 SAIterator_type SA, SAIterator_type SA_last,
1399 alphabetsize_type alphabetsize = 256) {
1400 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
1401 pos_type *bucket_A = NULL, *bucket_B = NULL;
1402 pos_type n = T_last - T, m = SA_last - SA;
1406 if((n < 0) || (m < n)) {
return -1; }
1407 else if(n == 0) {
return 0; }
1408 else if(n == 1) { *SA = 0;
return 0; }
1411 bucket_A =
new pos_type[alphabetsize];
if(bucket_A == NULL) {
throw; }
1412 bucket_B =
new pos_type[alphabetsize * alphabetsize];
if(bucket_B == NULL) {
throw; }
1425 template<
typename StringIterator_type,
typename SAIterator_type,
typename alphabet
size_type>
1428 typename std::iterator_traits<SAIterator_type>::value_type n,
1429 alphabetsize_type alphabetsize = 256) {
1430 return constructSA(T, T + n, SA, SA + n, alphabetsize);
1445 template<
typename StringIterator1_type,
typename StringIterator2_type,
1446 typename SAIterator_type,
typename alphabetsize_type>
1447 typename std::iterator_traits<SAIterator_type>::value_type
1448 constructBWT(
const StringIterator1_type T,
const StringIterator1_type T_last,
1449 StringIterator2_type U, StringIterator2_type U_last,
1450 SAIterator_type SA, SAIterator_type SA_last,
1451 alphabetsize_type alphabetsize = 256) {
1452 typedef typename std::iterator_traits<StringIterator2_type>::value_type value_type;
1453 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
1454 SAIterator_type piter, i, k;
1455 StringIterator2_type j;
1456 pos_type *bucket_A = NULL, *bucket_B = NULL;
1457 pos_type n = pos_type( T_last - T ), m = pos_type( SA_last - SA ), u = pos_type( U_last - U ), pidx = -1;
1460 if((n < pos_type(0)) || (m < n)) {
return -1; }
1461 else if(n == 0) {
return 0; }
1462 else if(n == 1) {
if(0 < u) { U[0] = T[0]; }
return 1; }
1465 bucket_A =
new pos_type[alphabetsize];
if(bucket_A == NULL) {
throw; }
1466 bucket_B =
new pos_type[alphabetsize * alphabetsize];
if(bucket_B == NULL) {
throw; }
1469 pidx = pos_type( piter - SA + 1 );
1471 U[pos_type(0)] = T[n - 1];
1473 for(i = SA, j = U + 1; i < piter; ++i, ++j) { *j = value_type(*i); }
1474 if(n <= u) {
for(i += 1; i < SA_last; ++i, ++j) { *j = value_type(*i); } }
1475 else {
for(i += 1, k = SA + u - 1; i < k; ++i, ++j) { *j = value_type(*i); } }
1477 for(i = SA, j = U + 1, k = SA + u - 1; i < k; ++i, ++j) { *j = value_type(*i); }
1490 template<
typename pos_type,
typename StringIterator1_type,
typename StringIterator2_type,
typename alphabet
size_type>
1492 constructBWT(
const StringIterator1_type T,
const StringIterator1_type T_last,
1493 StringIterator2_type U, StringIterator2_type U_last,
1494 alphabetsize_type alphabetsize = 256) {
1495 pos_type *SA = NULL;
1496 pos_type n = T_last - T, pidx = -1;
1499 if(n < 0) {
return -1; }
1500 else if(n == 0) {
return 0; }
1501 else if(n == 1) {
if(U < U_last) { U[pos_type(0)] = T[pos_type(0)]; }
return 1; }
1504 SA =
new pos_type[n];
if(SA == NULL) {
throw; }
1505 pidx =
constructBWT(T, T_last, U, U_last, SA, SA + n, alphabetsize);
1514 template<
typename pos_type,
typename StringIterator_type,
typename alphabet
size_type>
1517 alphabetsize_type alphabetsize = 256) {
1518 return constructBWT<pos_type>(T, T_last, T, T_last, alphabetsize);
1521 template<
typename StringIterator1_type,
typename StringIterator2_type,
1522 typename SAIterator_type,
typename alphabetsize_type>
1523 typename std::iterator_traits<SAIterator_type>::value_type
1524 constructBWT(
const StringIterator1_type T, StringIterator2_type U, SAIterator_type SA,
1525 typename std::iterator_traits<SAIterator_type>::value_type n,
1526 alphabetsize_type alphabetsize = 256) {
1527 return constructBWT(T, T + n, U, U + n, SA, SA + n, alphabetsize);
1530 template<
typename pos_type,
typename StringIterator1_type,
typename StringIterator2_type,
typename alphabet
size_type>
1533 pos_type n, alphabetsize_type alphabetsize = 256) {
1534 return constructBWT<pos_type>(T, T + n, U, U + n, alphabetsize);
1537 template<
typename pos_type,
typename StringIterator_type,
typename alphabet
size_type>
1539 constructBWT(StringIterator_type T, pos_type n, alphabetsize_type alphabetsize = 256) {
1540 return constructBWT<pos_type>(T, T + n, alphabetsize);