35 #ifdef __INTEL_COMPILER
36 #pragma warning(disable : 383 981 1418)
41 #pragma warning(disable : 4365)
44 namespace saisxx_private {
47 template<
typename string_type,
typename bucket_type,
typename index_type>
49 getCounts(
const string_type T, bucket_type C, index_type n, index_type k) {
51 for(i = 0; i < k; ++i) { C[i] = 0; }
52 for(i = 0; i < n; ++i) { ++C[T[i]]; }
54 template<
typename bucketC_type,
typename bucketB_type,
typename index_type>
56 getBuckets(
const bucketC_type C, bucketB_type B, index_type k,
bool end) {
57 index_type i, sum = 0;
58 if(end !=
false) {
for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } }
59 else {
for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } }
62 template<
typename string_type,
typename sarray_type,
63 typename bucketC_type,
typename bucketB_type,
typename index_type>
65 LMSsort1(string_type T, sarray_type SA,
66 bucketC_type C, bucketB_type B,
67 index_type n, index_type k,
bool recount) {
68 typedef typename std::iterator_traits<string_type>::value_type char_type;
69 typedef typename std::iterator_traits<bucketB_type>::value_type bucket_type;
75 if(recount !=
false) { getCounts(T, C, n, k); }
76 getBuckets(C, B, k,
false);
78 b = SA + B[c1 = T[j]];
80 *b++ = (T[j] < c1) ? ~j : j;
81 for(i = 0; i < n; ++i) {
83 assert(T[j] >= T[j + 1]);
84 if((c0 = T[j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
87 *b++ = (T[j] < c1) ? ~j : j;
94 if(recount !=
false) { getCounts(T, C, n, k); }
95 getBuckets(C, B, k,
true);
96 for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
98 assert(T[j] <= T[j + 1]);
99 if((c0 = T[j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
100 assert((b - SA) <= i);
102 *--b = (T[j] > c1) ? ~(j + 1) : j;
107 template<
typename string_type,
typename sarray_type,
typename index_type>
109 LMSpostproc1(string_type T, sarray_type SA, index_type n, index_type m) {
110 typedef typename std::iterator_traits<string_type>::value_type char_type;
111 index_type i, j, p, q, plen, qlen, name;
118 for(i = 0; (p = SA[i]) < 0; ++i) { SA[i] = ~p; assert((i + 1) < n); }
120 for(j = i, ++i;; ++i) {
122 if((p = SA[i]) < 0) {
123 SA[j++] = ~p; SA[i] = 0;
124 if(j == m) {
break; }
130 i = n - 1; j = n - 1; c0 = T[n - 1];
131 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) >= c1));
133 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) <= c1));
135 SA[m + ((i + 1) >> 1)] = j - i; j = i + 1;
136 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) >= c1));
141 for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
142 p = SA[i], plen = SA[m + (p >> 1)], diff =
true;
143 if((plen == qlen) && ((q + plen) < n)) {
144 for(j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { }
145 if(j == plen) { diff =
false; }
147 if(diff !=
false) { ++name, q = p, qlen = plen; }
148 SA[m + (p >> 1)] = name;
153 template<
typename string_type,
typename sarray_type,
154 typename bucketC_type,
typename bucketB_type,
typename bucketD_type,
157 LMSsort2(string_type T, sarray_type SA,
158 bucketC_type C, bucketB_type B, bucketD_type D,
159 index_type n, index_type k) {
160 typedef typename std::iterator_traits<string_type>::value_type char_type;
161 typedef typename std::iterator_traits<bucketB_type>::value_type bucket_type;
163 index_type i, j, t, d;
167 getBuckets(C, B, k,
false);
169 b = SA + B[c1 = T[j]];
173 *b++ = (t & 1) ? ~j : j;
174 for(i = 0, d = 0; i < n; ++i) {
175 if(0 < (j = SA[i])) {
176 if(n <= j) { d += 1; j -= n; }
177 assert(T[j] >= T[j + 1]);
178 if((c0 = T[j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
179 assert(i < (b - SA));
181 t = c0; t = (t << 1) | (T[j] < c1);
182 if(D[t] != d) { j += n; D[t] = d; }
183 *b++ = (t & 1) ? ~j : j;
189 for(i = n - 1; 0 <= i; --i) {
193 for(j = i - 1; SA[j] < n; --j) { }
201 getBuckets(C, B, k,
true);
202 for(i = n - 1, d += 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
203 if(0 < (j = SA[i])) {
204 if(n <= j) { d += 1; j -= n; }
205 assert(T[j] <= T[j + 1]);
206 if((c0 = T[j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
207 assert((b - SA) <= i);
209 t = c0; t = (t << 1) | (T[j] > c1);
210 if(D[t] != d) { j += n; D[t] = d; }
211 *--b = (t & 1) ? ~(j + 1) : j;
216 template<
typename sarray_type,
typename index_type>
218 LMSpostproc2(sarray_type SA, index_type n, index_type m) {
219 index_type i, j, d, name;
223 for(i = 0, name = 0; (j = SA[i]) < 0; ++i) {
225 if(n <= j) { name += 1; }
230 for(d = i, ++i;; ++i) {
232 if((j = SA[i]) < 0) {
234 if(n <= j) { name += 1; }
235 SA[d++] = j; SA[i] = 0;
236 if(d == m) {
break; }
242 for(i = m - 1, d = name + 1; 0 <= i; --i) {
243 if(n <= (j = SA[i])) { j -= n; --d; }
244 SA[m + (j >> 1)] = d;
248 for(i = 0; i < m; ++i) {
249 if(n <= (j = SA[i])) { j -= n; SA[i] = j; }
257 template<
typename string_type,
typename sarray_type,
258 typename bucketC_type,
typename bucketB_type,
typename index_type>
260 induceSA(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
261 index_type n, index_type k,
bool recount) {
262 typedef typename std::iterator_traits<string_type>::value_type char_type;
263 typedef typename std::iterator_traits<bucketB_type>::value_type bucket_type;
268 if(recount !=
false) { getCounts(T, C, n, k); }
269 getBuckets(C, B, k,
false);
270 b = SA + B[c1 = T[j = n - 1]];
271 *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
272 for(i = 0; i < n; ++i) {
273 j = SA[i], SA[i] = ~j;
275 if((c0 = T[--j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
276 *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
280 if(recount !=
false) { getCounts(T, C, n, k); }
281 getBuckets(C, B, k,
true);
282 for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
283 if(0 < (j = SA[i])) {
284 if((c0 = T[--j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
285 *--b = ((j == 0) || (T[j - 1] > c1)) ? ~j : j;
291 template<
typename string_type,
typename sarray_type,
292 typename bucketC_type,
typename bucketB_type,
typename index_type>
294 computeBWT(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
295 index_type n, index_type k,
bool recount) {
296 typedef typename std::iterator_traits<string_type>::value_type char_type;
297 typedef typename std::iterator_traits<bucketB_type>::value_type bucket_type;
299 index_type i, j, pidx = -1;
302 if(recount !=
false) { getCounts(T, C, n, k); }
303 getBuckets(C, B, k,
false);
304 b = SA + B[c1 = T[j = n - 1]];
305 *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
306 for(i = 0; i < n; ++i) {
307 if(0 < (j = SA[i])) {
308 SA[i] = ~((index_type)(c0 = T[--j]));
309 if(c0 != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
310 *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
316 if(recount !=
false) { getCounts(T, C, n, k); }
317 getBuckets(C, B, k,
true);
318 for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
319 if(0 < (j = SA[i])) {
320 SA[i] = (c0 = T[--j]);
321 if(c0 != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
322 *--b = ((0 < j) && (T[j - 1] > c1)) ? ~((index_type)T[j - 1]) : j;
332 template<
typename string_type,
typename sarray_type,
333 typename bucketC_type,
typename bucketB_type,
335 std::pair<index_type, index_type>
336 stage1sort(string_type T, sarray_type SA,
337 bucketC_type C, bucketB_type B,
338 index_type n, index_type k,
unsigned flags) {
339 typedef typename std::iterator_traits<string_type>::value_type char_type;
341 index_type i, j, name, m;
343 getCounts(T, C, n, k); getBuckets(C, B, k,
true);
344 for(i = 0; i < n; ++i) { SA[i] = 0; }
345 b = SA + n - 1; i = n - 1; j = n; m = 0; c0 = T[n - 1];
346 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) >= c1));
348 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) <= c1));
350 *b = j; b = SA + --B[c1]; j = i; ++m; assert(B[c1] != (n - 1));
351 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) >= c1));
357 if(flags & (16 | 32)) {
362 try { D =
new index_type[k * 2]; }
catch(...) { D = 0; }
363 if(D == 0) {
return std::make_pair(-2, -2); }
364 for(i = 0, j = 0; i < k; ++i) {
366 if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
369 LMSsort2(T, SA, C, B, D, n, k);
372 bucketB_type D = B - k * 2;
373 for(i = 0, j = 0; i < k; ++i) {
375 if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
378 LMSsort2(T, SA, C, B, D, n, k);
380 name = LMSpostproc2(SA, n, m);
382 LMSsort1(T, SA, C, B, n, k, (flags & (4 | 64)) != 0);
383 name = LMSpostproc1(T, SA, n, m);
391 return std::make_pair(m, name);
393 template<
typename string_type,
typename sarray_type,
394 typename bucketC_type,
typename bucketB_type,
typename index_type>
396 stage3sort(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
397 index_type n, index_type m, index_type k,
398 unsigned flags,
bool isbwt) {
399 typedef typename std::iterator_traits<string_type>::value_type char_type;
400 index_type i, j, p, q, pidx = 0;
402 if((flags & 8) != 0) { getCounts(T, C, n, k); }
405 getBuckets(C, B, k, 1);
406 i = m - 1, j = n, p = SA[m - 1], c1 = T[p];
409 while(q < j) { SA[--j] = 0; }
412 if(--i < 0) {
break; }
414 }
while((c1 = T[p]) == c0);
416 while(0 < j) { SA[--j] = 0; }
418 if(isbwt ==
false) { induceSA(T, SA, C, B, n, k, (flags & (4 | 64)) != 0); }
419 else { pidx = computeBWT(T, SA, C, B, n, k, (flags & (4 | 64)) != 0); }
425 template<
typename string_type,
typename sarray_type,
typename index_type>
427 suffixsort(string_type T, sarray_type SA,
428 index_type fs, index_type n, index_type k,
430 typedef typename std::iterator_traits<string_type>::value_type char_type;
431 sarray_type RA, C, B;
433 index_type i, j, m, name, pidx, newfs;
442 try { Cp =
new index_type[k]; }
catch(...) { Cp = 0; }
443 if(Cp == 0) {
return -2; }
445 B = SA + (n + fs - k);
448 try { Bp =
new index_type[k]; }
catch(...) { Bp = 0; }
449 if(Bp == 0) {
return -2; }
453 C = SA + (n + fs - k);
457 }
else if(k <= 1024) {
458 try { Bp =
new index_type[k]; }
catch(...) { Bp = 0; }
459 if(Bp == 0) {
return -2; }
466 try { Cp =
new index_type[k]; }
catch(...) { Cp = 0; }
467 if(Cp == 0) {
return -2; }
472 if(flags & 1) { flags |= ((k * 2) <= (fs - k)) ? 32 : 16; }
473 else if((flags == 0) && ((k * 2) <= (fs - k * 2))) { flags |= 32; }
476 std::pair<index_type, index_type> r;
478 if(Bp != 0) { r = stage1sort(T, SA, Cp, Bp, n, k, flags); }
479 else { r = stage1sort(T, SA, Cp, B, n, k, flags); }
481 if(Bp != 0) { r = stage1sort(T, SA, C, Bp, n, k, flags); }
482 else { r = stage1sort(T, SA, C, B, n, k, flags); }
484 m = r.first, name = r.second;
487 if(flags & (1 | 4)) {
delete[] Cp; }
488 if(flags & 2) {
delete[] Bp; }
495 if(flags & 4) {
delete[] Cp; }
496 if(flags & 2) {
delete[] Bp; }
497 newfs = (n + fs) - (m * 2);
498 if((flags & (1 | 4 | 64)) == 0) {
499 if((k + name) <= newfs) { newfs -= k; }
502 assert((n >> 1) <= (newfs + m));
504 for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) {
505 if(SA[i] != 0) { RA[j--] = SA[i] - 1; }
507 if(suffixsort(RA, SA, newfs, m, name,
false) != 0) {
if(flags & 1) {
delete[] Cp; }
return -2; }
508 i = n - 1; j = m - 1; c0 = T[n - 1];
509 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) >= c1));
511 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) <= c1));
514 do { c1 = c0; }
while((0 <= --i) && ((c0 = T[i]) >= c1));
517 for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; }
519 try { Cp =
new index_type[k]; }
catch(...) { Cp = 0; }
520 if(Cp == 0) {
return -2; }
524 try { Bp =
new index_type[k]; }
catch(...) { Bp = 0; }
525 if(Bp == 0) {
if(flags & 1) {
delete[] Cp; }
return -2; }
531 if(Bp != 0) { pidx = stage3sort(T, SA, Cp, Bp, n, m, k, flags, isbwt); }
532 else { pidx = stage3sort(T, SA, Cp, B, n, m, k, flags, isbwt); }
534 if(Bp != 0) { pidx = stage3sort(T, SA, C, Bp, n, m, k, flags, isbwt); }
535 else { pidx = stage3sort(T, SA, C, B, n, m, k, flags, isbwt); }
537 if(flags & (1 | 4)) {
delete[] Cp; }
538 if(flags & 2) {
delete[] Bp; }
554 template<
typename string_type,
typename sarray_type,
typename index_type>
556 saisxx(string_type T, sarray_type SA, index_type n, index_type k = 256) {
557 typedef typename std::iterator_traits<sarray_type>::value_type savalue_type;
562 if((n < 0) || (k <= 0)) {
return -1; }
563 if(n <= 1) {
if(n == 1) { SA[0] = 0; }
return 0; }
564 return saisxx_private::suffixsort(T, SA, 0, n, k,
false);
576 template<
typename string_type,
typename string_type2,
typename sarray_type,
typename index_type>
578 saisxx_bwt(string_type T, string_type2 U, sarray_type A, index_type n, index_type k = 256) {
579 typedef typename std::iterator_traits<sarray_type>::value_type savalue_type;
580 typedef typename std::iterator_traits<string_type>::value_type char_type;
586 if((n < 0) || (k <= 0)) {
return -1; }
587 if(n <= 1) {
if(n == 1) { U[0] = T[0]; }
return n; }
588 pidx = saisxx_private::suffixsort(T, A, index_type(0), n, k,
true);
590 U[index_type(0)] = T[n - 1];
591 for(i = 0; i < pidx; ++i) { U[i + 1] = (char_type)A[i]; }
592 for(i += 1; i < n; ++i) { U[i] = (char_type)A[i]; }