NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
divsufsortxx.h
Go to the documentation of this file.
1 /*
2  * divsufsortxx.h
3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 // r3
28 
29 #ifndef _DIVSUFSORTXX_H_
30 #define _DIVSUFSORTXX_H_
31 
32 #include <algorithm>
33 #include <stack>
34 
35 
36 namespace divsufsortxx {
37 
38 namespace helper {
39 
40 /* Insertionsort for small size groups. */
41 template<typename ISAIterator_type, typename SAIterator_type>
42 void
43 insertionsort(const ISAIterator_type ISAd,
44  SAIterator_type first, SAIterator_type last) {
45 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
46 typedef typename std::iterator_traits<ISAIterator_type>::value_type value_type;
47  SAIterator_type a, b;
48  pos_type t;
49  value_type v, x;
50 
51  for(a = first + 1; a < last; ++a) {
52  for(v = ISAd[t = *a], b = a; v < (x = ISAd[*(b - 1)]);) {
53  do { *b = *(b - 1); } while((first < --b) && (*(b - 1) < 0));
54  if(b <= first) { break; }
55  }
56  if(v == x) { *(b - 1) = ~*(b - 1); }
57  *b = t;
58  }
59 }
60 
61 
62 /*---------------------------------------------------------------------------*/
63 
64 /* Heapsort */
65 
66 template<typename StringIterator_type, typename SAIterator_type, typename pos_type>
67 void
68 fixdown(const StringIterator_type Td, SAIterator_type SA, pos_type i, pos_type size) {
69 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
70  pos_type j, k, t;
71  value_type c, d, e;
72 
73  for(t = SA[i], c = Td[t]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
74  d = Td[SA[k = j++]];
75  if(d < (e = Td[SA[j]])) { k = j; d = e; }
76  if(d <= c) { break; }
77  }
78  SA[i] = t;
79 }
80 
81 /* Simple top-down heapsort. */
82 template<typename StringIterator_type, typename SAIterator_type>
83 void
84 heapsort(const StringIterator_type Td, SAIterator_type SA,
85  typename std::iterator_traits<SAIterator_type>::value_type size) {
86 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
87  pos_type i, m;
88  pos_type t;
89 
90  m = size;
91  if((size % 2) == 0) {
92  m--;
93  if(Td[SA[m / 2]] < Td[SA[m]]) {
94  std::iter_swap(SA + m, SA + m / 2);
95  }
96  }
97 
98  for(i = m / 2 - 1; 0 <= i; --i) {
99  fixdown(Td, SA, i, m);
100  }
101 
102  if((size % 2) == 0) {
103  std::iter_swap(SA, SA + m);
104  fixdown(Td, SA, pos_type(0), m);
105  }
106 
107  for(i = m - 1; 0 < i; --i) {
108  t = SA[0];
109  SA[0] = SA[i];
110  fixdown(Td, SA, pos_type(0), i);
111  SA[i] = t;
112  }
113 }
114 
115 
116 /*---------------------------------------------------------------------------*/
117 
118 static
119 const int
120 log2table[256]= {
121  -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
122  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
123  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
124  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
125  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
126  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
127  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
128  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
129  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
130  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
131  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
132  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
133  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
134  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
135  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
136  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
137 };
138 
139 template<typename numeric_type>
140 int
141 lg(numeric_type n) {
142  int lgsize = 0;
143  while(255 < n) { lgsize += 8, n >>= 8; }
144  return lgsize + log2table[n];
145 }
146 
147 template<typename Iterator1_type, typename Iterator2_type>
148 void
149 vecswap(Iterator1_type first1, Iterator1_type last1, Iterator2_type first2) {
150  for(; first1 != last1; ++first1, ++first2) {
151  std::iter_swap(first1, first2);
152  }
153 // std::swap_ranges(first1, last1, first2);
154 }
155 
156 
157 /*---------------------------------------------------------------------------*/
158 
159 /* Returns the median of three elements. */
160 template<typename StringIterator_type, typename SAIterator_type>
161 SAIterator_type
162 median3(const StringIterator_type Td,
163  SAIterator_type v1, SAIterator_type v2, SAIterator_type v3) {
164  if(Td[*v1] > Td[*v2]) { std::swap(v1, v2); }
165  if(Td[*v2] > Td[*v3]) {
166  if(Td[*v1] > Td[*v3]) { return v1; }
167  else { return v3; }
168  }
169  return v2;
170 }
171 
172 /* Returns the median of five elements. */
173 template<typename StringIterator_type, typename SAIterator_type>
174 SAIterator_type
175 median5(const StringIterator_type Td,
176  SAIterator_type v1, SAIterator_type v2, SAIterator_type v3,
177  SAIterator_type v4, SAIterator_type v5) {
178  if(Td[*v2] > Td[*v3]) { std::swap(v2, v3); }
179  if(Td[*v4] > Td[*v5]) { std::swap(v4, v5); }
180  if(Td[*v2] > Td[*v4]) { std::swap(v2, v4); std::swap(v3, v5); }
181  if(Td[*v1] > Td[*v3]) { std::swap(v1, v3); }
182  if(Td[*v1] > Td[*v4]) { std::swap(v1, v4); std::swap(v3, v5); }
183  if(Td[*v3] > Td[*v4]) { return v4; }
184  return v3;
185 }
186 
187 /* Returns the pivot element. */
188 template<typename StringIterator_type, typename SAIterator_type>
189 SAIterator_type
190 pivot(const StringIterator_type Td,
191  SAIterator_type first, SAIterator_type last) {
192 typedef typename std::iterator_traits<SAIterator_type>::difference_type difference_type;
193  difference_type t = last - first;
194  SAIterator_type middle = first + t / 2;
195 
196  if(t <= 512) {
197  if(t <= 32) {
198  return median3(Td, first, middle, last - 1);
199  } else {
200  t >>= 2;
201  return median5(Td,
202  first, first + t, middle,
203  last - 1 - t, last - 1);
204  }
205  }
206  t >>= 3;
207  return median3(Td,
208  median3(Td, first, first + t, first + (t << 1)),
209  median3(Td, middle - t, middle, middle + t),
210  median3(Td, last - 1 - (t << 1), last - 1 - t, last - 1));
211 }
212 
213 /* Two-stage double-index controlled ternary partition */
214 template<typename StringIterator_type, typename SAIterator_type>
215 bool
216 partition(const StringIterator_type Td,
217  SAIterator_type first1, SAIterator_type first2, SAIterator_type last,
218  SAIterator_type &mfirst, SAIterator_type &mlast,
219  const typename std::iterator_traits<StringIterator_type>::value_type &v) {
220  SAIterator_type a, b, c, d;
221  typename std::iterator_traits<StringIterator_type>::value_type x = 0;
222 
223  for(b = first2; (b < last) && ((x = Td[*b]) == v); ++b) { }
224  if(((a = b) < last) && (x < v)) {
225  for(; (++b < last) && ((x = Td[*b]) <= v);) {
226  if(x == v) { std::iter_swap(b, a++); }
227  }
228  }
229  for(c = last; (b < --c) && ((x = Td[*c]) == v);) { }
230  if((b < (d = c)) && (x > v)) {
231  for(; (b < --c) && ((x = Td[*c]) >= v);) {
232  if(x == v) { std::iter_swap(c, d--); }
233  }
234  }
235  for(; b < c;) {
236  std::iter_swap(b, c);
237  for(; (++b < c) && ((x = Td[*b]) <= v);) {
238  if(x == v) { std::iter_swap(b, a++); }
239  }
240  for(; (b < --c) && ((x = Td[*c]) >= v);) {
241  if(x == v) { std::iter_swap(c, d--); }
242  }
243  }
244 
245  if(a <= d) {
246  vecswap(b - std::min(a - first1, b - a), b, first1);
247  vecswap(last - std::min(d + 1 - b, last - d - 1), last, b);
248  mfirst = first1 + (b - a), mlast = last - (d + 1 - b);
249  return true;
250  }
251  mfirst = first1, mlast = last;
252  return false;
253 }
254 
255 
256 /*---------------------------------------------------------------------------*/
257 
258 template<typename a_type, typename b_type, typename c_type>
259 struct stackinfo3 {
260  a_type m_a;
261  b_type m_b;
262  c_type m_c;
263  stackinfo3(a_type a, b_type b, c_type c) : m_a(a), m_b(b), m_c(c) { }
264 };
265 
266 template<typename a_type, typename b_type, typename c_type, typename d_type>
267 struct stackinfo4 {
268  a_type m_a;
269  b_type m_b;
270  c_type m_c;
271  d_type m_d;
272  stackinfo4(a_type a, b_type b, c_type c, d_type d) : m_a(a), m_b(b), m_c(c), m_d(d) { }
273 };
274 
275 } /* namespace helper */
276 
277 #define STACK_POP3(_a, _b, _c)\
278  {\
279  if(stack.empty()) { return; }\
280  stackinfo_type tempinfo = stack.top();\
281  (_a) = tempinfo.m_a, (_b) = tempinfo.m_b, (_c) = tempinfo.m_c;\
282  stack.pop();\
283  }
284 #define STACK_POP4(_a, _b, _c, _d)\
285  {\
286  if(stack.empty()) { return; }\
287  stackinfo_type tempinfo = stack.top();\
288  (_a) = tempinfo.m_a, (_b) = tempinfo.m_b, (_c) = tempinfo.m_c, (_d) = tempinfo.m_d;\
289  stack.pop();\
290  }
291 #define STACK_PUSH3(_a, _b, _c)\
292  stack.push(stackinfo_type((_a), (_b), (_c)))
293 #define STACK_PUSH4(_a, _b, _c, _d)\
294  stack.push(stackinfo_type((_a), (_b), (_c), (_d)))
295 
296 
297 namespace substring {
298 
299 template<typename StringIterator_type, typename SAIterator_type>
300 int
301 compare(StringIterator_type T,
302  const SAIterator_type p1, const SAIterator_type p2,
303  typename std::iterator_traits<SAIterator_type>::value_type depth) {
304  StringIterator_type U1 = T + depth + *p1,
305  U2 = T + depth + *p2,
306  U1n = T + *(p1 + 1) + 2,
307  U2n = T + *(p2 + 1) + 2;
308  for(; (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); ++U1, ++U2) { }
309 
310  return U1 < U1n ?
311  (U2 < U2n ? (*U2 < *U1) * 2 - 1 : 1) :
312 // (U2 < U2n ? *U1 - *U2 : 1) :
313  (U2 < U2n ? -1 : 0);
314 }
315 
316 template<typename StringIterator_type, typename SAIterator_type>
317 int
318 compare_last(StringIterator_type T,
319  const SAIterator_type p1, const SAIterator_type p2,
320  typename std::iterator_traits<SAIterator_type>::value_type depth,
321  typename std::iterator_traits<SAIterator_type>::value_type size) {
322  StringIterator_type U1 = T + depth + *p1,
323  U2 = T + depth + *p2,
324  U1n = T + size,
325  U2n = T + *(p2 + 1) + 2;
326  for(; (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); ++U1, ++U2) { }
327 
328  return U1 < U1n ?
329  (U2 < U2n ? (*U2 < *U1) * 2 - 1 : 1) :
330 // (U2 < U2n ? *U1 - *U2 : 1) :
331  (U2 < U2n ? -1 : 0);
332 }
333 
334 template<typename StringIterator_type, typename SAIterator_type>
335 void
336 insertionsort(StringIterator_type T, const SAIterator_type PA,
337  SAIterator_type first, SAIterator_type last,
338  typename std::iterator_traits<SAIterator_type>::value_type depth) {
339  SAIterator_type a, b;
340  typename std::iterator_traits<SAIterator_type>::value_type t;
341  int r;
342 
343  for(a = last - 1; first < a; --a) {
344  for(t = *(a - 1), b = a; 0 < (r = compare(T, PA + t, PA + *b, depth));) {
345  do { *(b - 1) = *b; } while((++b < last) && (*b < 0));
346  if(last <= b) { break; }
347  }
348  if(r == 0) { *b = ~*b; }
349  *(b - 1) = t;
350  }
351 }
352 
353 template<typename StringIterator_type, typename PAIterator_type>
355 private:
356  StringIterator_type m_Td;
357  const PAIterator_type m_PA;
358 public:
359  typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
360  typedef value_type * pointer;
362  typedef ptrdiff_t difference_type;
363  typedef std::random_access_iterator_tag iterator_category;
364 
365  typedef size_t size_type;
366 
367  substring_wrapper(StringIterator_type Td, const PAIterator_type PA) : m_Td(Td), m_PA(PA) {
368  }
369 
370  value_type
372  return m_Td[m_PA[i]];
373  }
374 
377  return *this;
378  }
379 };
380 
381 template<typename SAIterator_type>
382 SAIterator_type
383 partition(const SAIterator_type PA,
384  SAIterator_type first, SAIterator_type last,
385  typename std::iterator_traits<SAIterator_type>::value_type depth) {
386  SAIterator_type a, b;
387  for(a = first, b = last - 1;; ++a, --b) {
388  for(; (a <= b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1)); ++a) { *a = ~*a; }
389  for(; (a < b) && ((PA[*b] + depth) < (PA[*b + 1] + 1)); --b) { }
390  if(b <= a) { break; }
391  std::iter_swap(b, a);
392  *a = ~*a;
393  }
394  if(first < a) { *first = ~*first; }
395  return a;
396 }
397 
398 template<typename stack_type, typename StringIterator_type, typename SAIterator_type>
399 void
400 mintrosort(stack_type &stack, const StringIterator_type T, const SAIterator_type PA,
401  SAIterator_type first, SAIterator_type last,
402  typename std::iterator_traits<SAIterator_type>::value_type depth) {
403 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
404 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
406 typedef typename stack_type::value_type stackinfo_type;
407 
408  SAIterator_type a, b, c;
409  value_type v, x;
410  int limit;
411 
412  for(limit = helper::lg(last - first);;) {
413 
414  if((last - first) <= 8) {
415  if(1 < (last - first)) { insertionsort(T, PA, first, last, depth); }
416  STACK_POP4(first, last, depth, limit);
417  continue;
418  }
419 
420  const StringIterator_type Td = T + depth;
421  if(limit-- == 0) { helper::heapsort(wrapper_type(Td, PA), first, value_type( last - first )); }
422  if(limit < 0) {
423  for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
424  if((x = Td[PA[*a]]) != v) {
425  if(1 < (a - first)) { break; }
426  v = x;
427  first = a;
428  }
429  }
430  if(Td[PA[*first] - 1] < v) {
431  first = partition(PA, first, a, depth);
432  }
433  if((a - first) <= (last - a)) {
434  if(1 < (a - first)) {
435  STACK_PUSH4(a, last, depth, -1);
436  last = a, depth += 1, limit = helper::lg(a - first);
437  } else {
438  first = a, limit = -1;
439  }
440  } else {
441  if(1 < (last - a)) {
442  STACK_PUSH4(first, a, depth + 1, helper::lg(a - first));
443  first = a, limit = -1;
444  } else {
445  last = a, depth += 1, limit = helper::lg(a - first);
446  }
447  }
448  continue;
449  }
450 
451  /* partition */
452  a = helper::pivot(wrapper_type(Td, PA), first, last);
453  std::iter_swap(first, a);
454  if(helper::partition(wrapper_type(Td, PA), first, first + 1, last, a, c, Td[PA[*first]]) != false) {
455  b = (Td[PA[*a]] <= Td[PA[*a] - 1]) ? a : partition(PA, a, c, depth);
456 
457  if((a - first) <= (last - c)) {
458  if((last - c) <= (c - b)) {
459  STACK_PUSH4(b, c, depth + 1, helper::lg(c - b));
460  STACK_PUSH4(c, last, depth, limit);
461  last = a;
462  } else if((a - first) <= (c - b)) {
463  STACK_PUSH4(c, last, depth, limit);
464  STACK_PUSH4(b, c, depth + 1, helper::lg(c - b));
465  last = a;
466  } else {
467  STACK_PUSH4(c, last, depth, limit);
468  STACK_PUSH4(first, a, depth, limit);
469  first = b, last = c, depth += 1, limit = helper::lg(c - b);
470  }
471  } else {
472  if((a - first) <= (c - b)) {
473  STACK_PUSH4(b, c, depth + 1, helper::lg(c - b));
474  STACK_PUSH4(first, a, depth, limit);
475  first = c;
476  } else if((last - c) <= (c - b)) {
477  STACK_PUSH4(first, a, depth, limit);
478  STACK_PUSH4(b, c, depth + 1, helper::lg(c - b));
479  first = c;
480  } else {
481  STACK_PUSH4(first, a, depth, limit);
482  STACK_PUSH4(c, last, depth, limit);
483  first = b, last = c, depth += 1, limit = helper::lg(c - b);
484  }
485  }
486  } else {
487  limit += 1;
488  if(Td[PA[*first] - 1] < Td[PA[*first]]) {
489  first = partition(PA, first, last, depth);
490  limit = helper::lg(last - first);
491  }
492  depth += 1;
493  }
494  }
495 }
496 
497 /* Merge-forward with internal buffer. */
498 template<typename StringIterator_type, typename SAIterator_type, typename BufIterator_type>
499 void
500 merge_forward(const StringIterator_type T,
501  const SAIterator_type PA, BufIterator_type buf,
502  SAIterator_type first, SAIterator_type middle, SAIterator_type last,
503  typename std::iterator_traits<SAIterator_type>::value_type depth) {
504  SAIterator_type i, k;
505  BufIterator_type j, bufend;
506  typename std::iterator_traits<SAIterator_type>::value_type t;
507  int r;
508 
509  bufend = buf + (middle - first);
510  helper::vecswap(first, middle, buf);
511 
512  for(t = *first, i = first, j = buf, k = middle;;) {
513  r = compare(T, PA + *j, PA + *k, depth);
514  if(r < 0) {
515  do {
516  *i++ = *j; *j++ = *i;
517  if(bufend <= j) { *(bufend - 1) = t; return; }
518  } while(*j < 0);
519  } else if(r > 0) {
520  do {
521  *i++ = *k; *k++ = *i;
522  if(last <= k) {
523  do { *i++ = *j; *j++ = *i; } while(j < bufend);
524  *(bufend - 1) = t;
525  return;
526  }
527  } while(*k < 0);
528  } else {
529  *k = ~*k;
530  do {
531  *i++ = *j; *j++ = *i;
532  if(bufend <= j) { *(bufend - 1) = t; return; }
533  } while(*j < 0);
534 
535  do {
536  *i++ = *k; *k++ = *i;
537  if(last <= k) {
538  do { *i++ = *j; *j++ = *i; } while(j < bufend);
539  *(bufend - 1) = t;
540  return;
541  }
542  } while(*k < 0);
543  }
544  }
545 }
546 
547 /* Merge-backward with internal buffer. */
548 template<typename StringIterator_type, typename SAIterator_type, typename BufIterator_type>
549 void
550 merge_backward(const StringIterator_type T,
551  const SAIterator_type PA, BufIterator_type buf,
552  SAIterator_type first, SAIterator_type middle, SAIterator_type last,
553  typename std::iterator_traits<SAIterator_type>::value_type depth) {
554  SAIterator_type p1, p2;
555  SAIterator_type i, k;
556  BufIterator_type j, bufend;
557  typename std::iterator_traits<SAIterator_type>::value_type t;
558  int r, x;
559 
560  bufend = buf + (last - middle);
561  helper::vecswap(middle, last, buf);
562 
563  x = 0;
564  if(*(bufend - 1) < 0) { x |= 1; p1 = PA + ~*(bufend - 1); }
565  else { p1 = PA + *(bufend - 1); }
566  if(*(middle - 1) < 0) { x |= 2; p2 = PA + ~*(middle - 1); }
567  else { p2 = PA + *(middle - 1); }
568  for(t = *(last - 1), i = last - 1, j = bufend - 1, k = middle - 1;;) {
569 
570  r = compare(T, p1, p2, depth);
571  if(r > 0) {
572  if(x & 1) { do { *i-- = *j; *j-- = *i; } while(*j < 0); x ^= 1; }
573  *i-- = *j; *j = *i;
574  if(j <= buf) { *buf = t; return; }
575 
576  if(*--j < 0) { x |= 1; p1 = PA + ~*j; }
577  else { p1 = PA + *j; }
578  } else if(r < 0) {
579  if(x & 2) { do { *i-- = *k; *k-- = *i; } while(*k < 0); x ^= 2; }
580  *i-- = *k; *k = *i;
581  if(k <= first) {
582  while(buf < j) { *i-- = *j; *j-- = *i; }
583  *i = *j, *buf = t;
584  return;
585  }
586 
587  if(*--k < 0) { x |= 2; p2 = PA + ~*k; }
588  else { p2 = PA + *k; }
589  } else {
590  if(x & 1) { do { *i-- = *j; *j-- = *i; } while(*j < 0); x ^= 1; }
591  *i-- = ~*j; *j = *i;
592  if(j <= buf) { *buf = t; return; }
593  --j;
594 
595  if(x & 2) { do { *i-- = *k; *k-- = *i; } while(*k < 0); x ^= 2; }
596  *i-- = *k; *k = *i;
597  if(k <= first) {
598  while(buf < j) { *i-- = *j; *j-- = *i; }
599  *i = *j, *buf = t;
600  return;
601  }
602  --k;
603 
604  if(*j < 0) { x |= 1; p1 = PA + ~*j; }
605  else { p1 = PA + *j; }
606  if(*k < 0) { x |= 2; p2 = PA + ~*k; }
607  else { p2 = PA + *k; }
608  }
609  }
610 }
611 
612 /* Faster merge (based on divide and conquer technique). */
613 template<typename stack_type, typename StringIterator_type, typename SAIterator_type, typename BufIterator_type>
614 void
615 merge(stack_type &stack, const StringIterator_type T, const SAIterator_type PA,
616  SAIterator_type first, SAIterator_type middle, SAIterator_type last,
617  BufIterator_type buf, typename std::iterator_traits<SAIterator_type>::value_type bufsize,
618  typename std::iterator_traits<SAIterator_type>::value_type depth) {
619 typedef typename std::iterator_traits<SAIterator_type>::difference_type difference_type;
620 typedef typename stack_type::value_type stackinfo_type;
621 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
622 #define MERGE_CHECK(a)\
623  {\
624  if((0 <= *(a)) &&\
625  (compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0)) {\
626  *(a) = ~*(a);\
627  }\
628  }
629 
630  SAIterator_type i, j;
631  difference_type m, len, half;
632  int check, next;
633 
634  for(check = 0;;) {
635 
636  if((last - middle) <= bufsize) {
637  if((first < middle) && (middle < last)) {
638  merge_backward(T, PA, buf, first, middle, last, depth);
639  }
640  if(check & 1) { MERGE_CHECK(first); }
641  if(check & 2) { MERGE_CHECK(last); }
642  STACK_POP4(first, middle, last, check);
643  continue;
644  }
645 
646  if((middle - first) <= bufsize) {
647  if(first < middle) {
648  merge_forward(T, PA, buf, first, middle, last, depth);
649  }
650  if(check & 1) { MERGE_CHECK(first); }
651  if(check & 2) { MERGE_CHECK(last); }
652  STACK_POP4(first, middle, last, check);
653  continue;
654  }
655 
656  for(m = 0, len = std::min(middle - first, last - middle), half = len >> 1;
657  0 < len;
658  len = half, half >>= 1) {
659  if(compare(T, PA + GETIDX(*(middle + m + half)),
660  PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
661  m += half + 1;
662  half -= ((len & 1) == 0);
663  }
664  }
665 
666  if(0 < m) {
667  helper::vecswap(middle - m, middle, middle);
668  i = j = middle, next = 0;
669  if((middle + m) < last) {
670  if(*(middle + m) < 0) {
671  for(; *(i - 1) < 0; --i) { }
672  *(middle + m) = ~*(middle + m);
673  }
674  for(j = middle; *j < 0; ++j) { }
675  next = 1;
676  }
677  if((i - first) <= (last - j)) {
678  STACK_PUSH4(j, middle + m, last, (check & 2) | (next & 1));
679  middle -= m, last = i, check = (check & 1);
680  } else {
681  if((i == middle) && (middle == j)) { next <<= 1; }
682  STACK_PUSH4(first, middle - m, i, (check & 1) | (next & 2));
683  first = j, middle += m, check = (check & 2) | (next & 1);
684  }
685  } else {
686  if(check & 1) { MERGE_CHECK(first); }
687  MERGE_CHECK(middle);
688  if(check & 2) { MERGE_CHECK(last); }
689  STACK_POP4(first, middle, last, check);
690  }
691  }
692 #undef GETIDX
693 #undef MERGE_CHECK
694 }
695 
696 template<typename StringIterator_type, typename SAIterator_type, typename BufIterator_type>
697 void
698 sort(const StringIterator_type T, const SAIterator_type PA,
699  SAIterator_type first, SAIterator_type last,
700  BufIterator_type buf, typename std::iterator_traits<SAIterator_type>::value_type bufsize,
701  typename std::iterator_traits<SAIterator_type>::value_type depth,
702  typename std::iterator_traits<SAIterator_type>::value_type size,
703  bool lastsuffix, int blocksize = 1024) {
704 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
705  std::stack<helper::stackinfo4<SAIterator_type, SAIterator_type, pos_type, int> > stack1;
706  std::stack<helper::stackinfo4<SAIterator_type, SAIterator_type, SAIterator_type, int> > stack2;
707 
708  SAIterator_type a, b;
709  SAIterator_type curbuf;
710  pos_type i, j, k, curbufsize;
711 
712  if(lastsuffix != false) { ++first; }
713  for(a = first, i = 0; blocksize < (last - a); a += blocksize, ++i) {
714  mintrosort(stack1, T, PA, a, a + blocksize, depth);
715  curbuf = a + blocksize;
716  curbufsize = pos_type( last - (a + blocksize) );
717  if(bufsize <= curbufsize) {
718  for(b = a, k = blocksize, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
719  merge(stack2, T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
720  }
721  } else {
722  for(b = a, k = blocksize, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
723  merge(stack2, T, PA, b - k, b, b + k, buf, bufsize, depth);
724  }
725  }
726  }
727  mintrosort(stack1, T, PA, a, last, depth);
728  for(k = blocksize; i != 0; k <<= 1, i >>= 1) {
729  if(i & 1) {
730  merge(stack2, T, PA, a - k, a, last, buf, bufsize, depth);
731  a -= k;
732  }
733  }
734 
735  if(lastsuffix != false) {
736  /* Insert last type B* suffix. */
737  for(a = first, i = *(first - 1);
738  (a < last) && ((*a < 0) || (0 < compare_last(T, PA + i, PA + *a, depth, size)));
739  ++a) {
740  *(a - 1) = *a;
741  }
742  *(a - 1) = i;
743  }
744 }
745 
746 
747 } /* namespace substring */
748 
749 
750 
751 namespace doubling {
752 
753 template<typename ISAIterator_type, typename SAIterator_type>
754 void
755 updategroup(ISAIterator_type ISA, const SAIterator_type SA,
756  SAIterator_type first, SAIterator_type last) {
757 typedef typename std::iterator_traits<ISAIterator_type>::value_type value_type;
758 typedef typename std::iterator_traits<SAIterator_type>::value_type sa_type;
759  SAIterator_type a, b;
760  value_type t;
761 
762  for(a = first; a < last; ++a) {
763  if(0 <= *a) {
764  b = a;
765  do { ISA[*a] = sa_type( a - SA ); } while((++a < last) && (0 <= *a));
766  *b = sa_type( b - a );
767  if(last <= a) { break; }
768  }
769  b = a;
770  do { *a = ~*a; } while(*++a < 0);
771  t = value_type( a - SA );
772  do { ISA[*b] = t; } while(++b <= a);
773  }
774 }
775 
776 template<typename stack_type, typename ISAIterator_type, typename SAIterator_type>
777 void
778 introsort(stack_type &stack, ISAIterator_type ISA, const ISAIterator_type ISAd,
779  const SAIterator_type SA, SAIterator_type first, SAIterator_type last) {
780 typedef typename std::iterator_traits<ISAIterator_type>::value_type value_type;
781 typedef typename std::iterator_traits<SAIterator_type>::value_type sa_type;
782 typedef typename stack_type::value_type stackinfo_type;
783 
784  SAIterator_type a, b, c;
785  value_type v, x;
786 
787  for(int limit = helper::lg(last - first);;) {
788 
789  if((last - first) <= 8) {
790  if(1 < (last - first)) {
791  helper::insertionsort(ISAd, first, last);
792  updategroup(ISA, SA, first, last);
793  } else if((last - first) == 1) { *first = -1; }
794  STACK_POP3(first, last, limit);
795  continue;
796  }
797 
798  if(limit-- == 0) {
799  helper::heapsort(ISAd, first, value_type( last - first ));
800  for(a = last - 1, v = ISAd[*a]; first < a;) {
801  if((x = ISAd[*--a]) == v) { *a = ~*a; }
802  else { v = x; }
803  }
804  updategroup(ISA, SA, first, last);
805  STACK_POP3(first, last, limit);
806  continue;
807  }
808 
809  a = helper::pivot(ISAd, first, last);
810  std::iter_swap(first, a);
811  if(helper::partition(ISAd, first, first + 1, last, a, b, ISAd[*first]) != false) {
812 
813  /* update ranks */
814  for(c = first, v = value_type( a - SA - 1 ); c < a; ++c) { ISA[*c] = v; }
815  if(b < last) { for(c = a, v = value_type( b - SA - 1 ); c < b; ++c) { ISA[*c] = v; } }
816  if((b - a) == 1) { *a = - 1; }
817 
818  if((a - first) <= (last - b)) {
819  if(first < a) {
820  STACK_PUSH3(b, last, limit);
821  last = a;
822  } else {
823  first = b;
824  }
825  } else {
826  if(b < last) {
827  STACK_PUSH3(first, a, limit);
828  first = b;
829  } else {
830  last = a;
831  }
832  }
833  } else {
834  STACK_POP3(first, last, limit);
835  }
836  }
837 }
838 
839 template<typename ISAIterator_type, typename SAIterator_type>
840 void
841 sort(ISAIterator_type ISA, SAIterator_type first, SAIterator_type last) {
842 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
843  std::stack<helper::stackinfo3<SAIterator_type, SAIterator_type, int> > stack;
844  SAIterator_type a, b;
845  pos_type t, skip, depth, size;
846 
847  for(size = pos_type( last - first ), depth = 1; -size < *first; depth *= 2) {
848  a = first, skip = 0;
849  do {
850  if((t = *a) < 0) { a -= t; skip += t; }
851  else {
852  if(skip != 0) { *(a + skip) = skip; skip = 0; }
853  b = first + ISA[t] + 1;
854  introsort(stack, ISA, ISA + depth, first, a, b);
855  a = b;
856  }
857  } while(a < last);
858  if(skip != 0) { *(a + skip) = skip; }
859  }
860 }
861 
862 } /* namespace doubling */
863 
864 
865 
866 namespace tandemrepeat {
867 
868 template<typename stack_type, typename ISAIterator_type, typename SAIterator_type, typename pos_type>
869 void
870 introsort(stack_type &stack, ISAIterator_type ISA, ISAIterator_type ISAd,
871  const SAIterator_type SA, SAIterator_type first, SAIterator_type last,
872  pos_type &budget, int &chance, pos_type size) {
873 typedef typename std::iterator_traits<ISAIterator_type>::value_type value_type;
874 typedef typename stack_type::value_type stackinfo_type;
875 #define UPDATE_BUDGET(n)\
876  {\
877  budget -= (n);\
878  if(budget <= 0) {\
879  budget += size;\
880  if(--chance == 0) { break; }\
881  }\
882  }
883 
884  SAIterator_type a, b, c, d, e;
885  pos_type s, t;
886  value_type v, x;
887  int limit, next;
888 
889  for(limit = helper::lg(last - first);;) {
890 
891  if(limit < 0) {
892  if(limit == -1) {
893  /* tandem repeat partition */
894  helper::partition(ISAd - 1, first, first, last, a, b, value_type( last - SA - 1 ));
895 
896  /* update ranks */
897  if(a < last) {
898  for(c = first, v = value_type( a - SA - 1 ); c < a; ++c) { ISA[*c] = v; }
899  }
900  if(b < last) {
901  for(c = a, v = value_type( b - SA - 1 ); c < b; ++c) { ISA[*c] = v; }
902  }
903 
904  /* push */
905  STACK_PUSH4(ISAd, a, b, 0);
906  STACK_PUSH4(ISAd - 1, first, last, -2);
907  if((a - first) <= (last - b)) {
908  if(first < a) {
909  STACK_PUSH4(ISAd, b, last, helper::lg(last - b)); last = a;
910  } else { first = b; }
911  } else {
912  if(b < last) {
913  STACK_PUSH4(ISAd, first, a, helper::lg(a - first)); first = b;
914  } else { last = a; }
915  }
916  limit = helper::lg(last - first);
917  } else if(limit == -2) {
918  /* tandem repeat copy */
919  stackinfo_type temp = stack.top(); stack.pop();
920  a = temp.m_b, b = temp.m_c;
921  t = value_type( ISAd - ISA );
922  v = value_type( b - SA - 1 );
923  for(c = first, d = a; c < d; ++c) {
924  if((0 <= (s = *c - t)) && (ISA[s] == v)) {
925  ISA[s] = value_type( d - SA );
926  *d++ = s;
927  }
928  }
929  for(c = last - 1, e = d, d = b; e < d; --c) {
930  if((0 <= (s = *c - t)) && (ISA[s] == v)) {
931  *--d = s;
932  ISA[s] = value_type( d - SA );
933  }
934  }
935  STACK_POP4(ISAd, first, last, limit);
936  } else {
937  /* sorted partition */
938  if(0 <= *first) {
939  a = first;
940  do { ISA[*a] = value_type( a - SA ); } while((++a < last) && (0 <= *a));
941  first = a;
942  }
943  if(first < last) {
944  a = first; do { *a = ~*a; } while(*++a < 0); ++a;
945  if(a < last) {
946  for(c = first, v = value_type( a - SA - 1 ); c < a; ++c) { ISA[*c] = v; }
947  }
948 
949  /* push */
950  next = (ISA[*first] == ISAd[*first]) ? -1 : helper::lg(a - first);
951  UPDATE_BUDGET(value_type( last - first ));
952  if((a - first) <= (last - a)) {
953  if(first < a) {
954  STACK_PUSH4(ISAd, a, last, -3);
955  ISAd += 1, last = a, limit = next;
956  } else {
957  first = a, limit = -3;
958  }
959  } else {
960  if(a < last) {
961  STACK_PUSH4(ISAd + 1, first, a, next);
962  first = a, limit = -3;
963  } else {
964  ISAd += 1, last = a, limit = next;
965  }
966  }
967  } else {
968  STACK_POP4(ISAd, first, last, limit);
969  }
970  }
971  continue;
972  }
973 
974  if((last - first) <= 8) {
975  if(1 < (last - first)) {
976  helper::insertionsort(ISAd, first, last);
977  limit = -3;
978  } else {
979  STACK_POP4(ISAd, first, last, limit);
980  }
981  continue;
982  }
983 
984  if(limit-- == 0) {
985  helper::heapsort(ISAd, first, value_type( last - first ));
986  for(a = last - 1, v = ISAd[*a]; first < a;) {
987  if((x = ISAd[*--a]) == v) { *a = ~*a; }
988  else { v = x; }
989  }
990  limit = -3;
991  continue;
992  }
993 
994  a = helper::pivot(ISAd, first, last);
995  std::iter_swap(first, a);
996  if(helper::partition(ISAd, first, first + 1, last, a, b, ISAd[*first]) != false) {
997  next = (ISA[*a] == ISAd[*a]) ? -1 : helper::lg(b - a);
998 
999  /* update ranks */
1000  for(c = first, v = value_type( a - SA - 1 ); c < a; ++c) { ISA[*c] = v; }
1001  if(b < last) { for(c = a, v = value_type( b - SA - 1 ); c < b; ++c) { ISA[*c] = v; } }
1002 
1003  /* push */
1004  UPDATE_BUDGET(value_type( last - first ));
1005  if((a - first) <= (last - b)) {
1006  if((last - b) <= (b - a)) {
1007  STACK_PUSH4(ISAd + 1, a, b, next);
1008  STACK_PUSH4(ISAd, b, last, limit);
1009  last = a;
1010  } else if((a - first) <= (b - a)) {
1011  STACK_PUSH4(ISAd, b, last, limit);
1012  STACK_PUSH4(ISAd + 1, a, b, next);
1013  last = a;
1014  } else {
1015  STACK_PUSH4(ISAd, b, last, limit);
1016  STACK_PUSH4(ISAd, first, a, limit);
1017  ISAd += 1, first = a, last = b, limit = next;
1018  }
1019  } else {
1020  if((a - first) <= (b - a)) {
1021  STACK_PUSH4(ISAd + 1, a, b, next);
1022  STACK_PUSH4(ISAd, first, a, limit);
1023  first = b;
1024  } else if((last - b) <= (b - a)) {
1025  STACK_PUSH4(ISAd, first, a, limit);
1026  STACK_PUSH4(ISAd + 1, a, b, next);
1027  first = b;
1028  } else {
1029  STACK_PUSH4(ISAd, first, a, limit);
1030  STACK_PUSH4(ISAd, b, last, limit);
1031  ISAd += 1, first = a, last = b, limit = next;
1032  }
1033  }
1034  } else {
1035  limit = (ISA[*first] == ISAd[*first]) ? -1 : (limit + 1), ISAd += 1;
1036  UPDATE_BUDGET(value_type( last - first ));
1037  }
1038  }
1039 
1040  for(; stack.empty() == false; stack.pop()) {
1041  stackinfo_type temp = stack.top();
1042  if(temp.m_d == -3) {
1043  first = temp.m_b, last = temp.m_c;
1044  for(a = first; a < last; ++a) {
1045  if(0 <= *a) {
1046  b = a;
1047  do { ISA[*a] = value_type( a - SA ); } while((++a < last) && (0 <= *a));
1048  if(last <= a) { break; }
1049  }
1050  b = a;
1051  do { *a = ~*a; } while(*++a < 0);
1052  v = value_type( a - SA );
1053  do { ISA[*b] = v; } while(++b <= a);
1054  }
1055  }
1056  }
1057 #undef UPDATE_BUDGET
1058 }
1059 
1060 template<typename ISAIterator_type, typename SAIterator_type>
1061 bool
1062 sort(ISAIterator_type ISA, SAIterator_type first, SAIterator_type last) {
1063 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
1064  std::stack<helper::stackinfo4<ISAIterator_type, SAIterator_type, SAIterator_type, int> > stack;
1065  SAIterator_type a, b;
1066  pos_type t, skip, size, budget;
1067  int chance;
1068 
1069  size = pos_type( last - first );
1070  if(-size < *first) {
1071  a = first, skip = 0, budget = size;
1072 // chance = helper::lg(size);
1073  chance = helper::lg(size) * 2 / 3 + 1;
1074 // chance = helper::lg(size) / 2 + 1;
1075  do {
1076  if((t = *a) < 0) { a -= t; skip += t; }
1077  else {
1078  skip = 0;
1079  b = first + ISA[t] + 1;
1080  introsort(stack, ISA, ISA + 1, first, a, b, budget, chance, size);
1081  if(chance == 0) {
1082  /* Switch to Larsson-Sadakane sorting algorithm. */
1083  if(first < a) { *first = -pos_type(a - first); }
1084  return false;
1085  }
1086  a = b;
1087  }
1088  } while(a < last);
1089  }
1090 
1091  return true;
1092 }
1093 
1094 
1095 } /* namespace tandemrepeat */
1096 
1097 
1098 #undef STACK_POP3
1099 #undef STACK_POP4
1100 #undef STACK_PUSH3
1101 #undef STACK_PUSH4
1102 
1103 
1104 static const int EXTRA_SPACE = 0;
1105 
1106 namespace core {
1107 #define BUCKET_A(c0) bucket_A[(c0)]
1108 #define BUCKET_B(c0, c1) (bucket_B[((alphabetsize_type)(c1)) * alphabetsize + (alphabetsize_type)(c0)])
1109 #define BUCKET_BSTAR(c0, c1) (bucket_B[((alphabetsize_type)(c0)) * alphabetsize + (alphabetsize_type)(c1)])
1110 
1111 static const int MERGE_BUFSIZE = 256;
1112 
1113 /* Sorts suffixes of type B*. */
1114 template<typename StringIterator_type, typename SAIterator_type, typename pos_type, typename alphabetsize_type>
1115 pos_type
1116 sort_typeBstar(const StringIterator_type T, SAIterator_type SA,
1117  pos_type *bucket_A, pos_type *bucket_B,
1118  pos_type n, pos_type SAsize, alphabetsize_type alphabetsize) {
1119 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
1120  pos_type i, j, k, t, m, bufsize;
1121  alphabetsize_type c0, c1;
1122 
1123  /* Initialize bucket arrays. */
1124  for(i = 0; i < alphabetsize; ++i) { bucket_A[i] = 0; }
1125  for(i = 0; i < alphabetsize * alphabetsize; ++i) { bucket_B[i] = 0; }
1126 
1127  /* Count the number of occurrences of the first one or two characters of each
1128  type A, B and B* suffix. Moreover, store the beginning position of all
1129  type B* suffixes into the array SA. */
1130  for(i = n - 1, m = SAsize; 0 <= i;) {
1131  /* type A suffix. */
1132  do { ++BUCKET_A(T[i]); } while((0 <= --i) && (T[i] >= T[i + 1]));
1133  if(0 <= i) {
1134  /* type B* suffix. */
1135  ++BUCKET_BSTAR(T[i], T[i + 1]);
1136  SA[--m] = i;
1137  /* type B suffix. */
1138  for(--i; (0 <= i) && (T[i] <= T[i + 1]); --i) {
1139  ++BUCKET_B(T[i], T[i + 1]);
1140  }
1141  }
1142  }
1143  m = SAsize - m;
1144 
1145  /* Calculate the index of start/end point of each bucket. */
1146  for(c0 = 0, i = -1, j = 0; c0 < alphabetsize; ++c0) {
1147  t = i + BUCKET_A(c0);
1148  BUCKET_A(c0) = i + j; /* start point */
1149  i = t + BUCKET_B(c0, c0);
1150  for(c1 = c0 + 1; c1 < alphabetsize; ++c1) {
1151  j += BUCKET_BSTAR(c0, c1);
1152  BUCKET_BSTAR(c0, c1) = j; /* end point */
1153  i += BUCKET_B(c0, c1);
1154  }
1155  }
1156 
1157  if(0 < m) {
1158  /* Sort the type B* suffixes by their first two characters. */
1159  SAIterator_type PAb = SA + SAsize - m, ISAb = SA + m;
1160  for(i = m - 2; 0 <= i; --i) {
1161  t = PAb[i], c0 = T[t], c1 = T[t + 1];
1162  SA[--BUCKET_BSTAR(c0, c1)] = i;
1163  }
1164  t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1165  SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1166 
1167  /* Sort the type B* substrings using sssort. */
1168  bufsize = SAsize - 2 * m;
1169  if(MERGE_BUFSIZE < bufsize) {
1170  SAIterator_type buf = SA + m;
1171  for(c0 = alphabetsize - 1, j = m; 0 < j; --c0) {
1172  for(c1 = alphabetsize - 1; c0 < c1; j = i, --c1) {
1173  i = BUCKET_BSTAR(c0, c1);
1174  if(1 < (j - i)) {
1175  substring::sort(T, PAb, SA + i, SA + j, buf, bufsize, 2, n, *(SA + i) == (m - 1));
1176  }
1177  }
1178  }
1179  } else {
1180  pos_type *lbuf = NULL;
1181  int err = 0;
1182  try {
1183  lbuf = new pos_type[MERGE_BUFSIZE]; if(lbuf == NULL) { throw; }
1184  for(c0 = alphabetsize - 1, j = m; 0 < j; --c0) {
1185  for(c1 = alphabetsize - 1; c0 < c1; j = i, --c1) {
1186  i = BUCKET_BSTAR(c0, c1);
1187  if(1 < (j - i)) {
1188  substring::sort(T, PAb, SA + i, SA + j, lbuf, MERGE_BUFSIZE, 2, n, *(SA + i) == (m - 1));
1189  }
1190  }
1191  }
1192  } catch(...) {
1193  err = -1;
1194  }
1195  delete[] lbuf;
1196  if(err != 0) { throw; }
1197  }
1198 
1199  /* Compute ranks of type B* substrings. */
1200  for(i = m - 1; 0 <= i; --i) {
1201  if(0 <= SA[i]) {
1202  j = i;
1203  do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1204  SA[i + 1] = i - j;
1205  if(i <= 0) { break; }
1206  }
1207  j = i;
1208  do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1209  ISAb[SA[i]] = j;
1210  }
1211 
1212  /* Construct the inverse suffix array of type B* suffixes using tandemrepeat/doubling sorting algorithms. */
1213  if(tandemrepeat::sort(ISAb, SA, SA + m) == false) {
1214  doubling::sort(ISAb, SA, SA + m);
1215  }
1216 
1217  /* Set the sorted order of tyoe B* suffixes. */
1218  for(i = n - 1, j = m; 0 <= i;) {
1219  for(--i; (0 <= i) && (T[i] >= T[i + 1]); --i) { }
1220  if(0 <= i) {
1221  SA[ISAb[--j]] = i;
1222  for(--i; (0 <= i) && (T[i] <= T[i + 1]); --i) { }
1223  }
1224  }
1225 
1226  /* Calculate the index of start/end point of each bucket. */
1227  for(c0 = alphabetsize - 1, i = n - 1, k = m - 1; 0 <= c0; --c0) {
1228  for(c1 = alphabetsize - 1; c0 < c1; --c1) {
1229  t = i - BUCKET_B(c0, c1);
1230  BUCKET_B(c0, c1) = i + 1; /* end point */
1231 
1232  /* Move all type B* suffixes to the correct position. */
1233  for(i = t, j = BUCKET_BSTAR(c0, c1);
1234  j <= k;
1235  --i, --k) { SA[i] = SA[k]; }
1236  }
1237  t = i - BUCKET_B(c0, c0);
1238  BUCKET_B(c0, c0) = i + 1; /* end point */
1239  if(c0 < (alphabetsize - 1)) {
1240  BUCKET_BSTAR(c0, c0 + 1) = t + 1; /* start point */
1241  }
1242  i = BUCKET_A(c0);
1243  }
1244  }
1245 
1246  return m;
1247 }
1248 
1249 /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1250 template<typename StringIterator_type, typename SAIterator_type, typename pos_type, typename alphabetsize_type>
1251 void
1252 constructSA_from_typeBstar(const StringIterator_type T, SAIterator_type SA,
1253  pos_type *bucket_A, pos_type *bucket_B,
1254  pos_type n, pos_type m, alphabetsize_type alphabetsize) {
1255 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
1256  SAIterator_type i, j, t = SA;
1257  pos_type s;
1258  alphabetsize_type c0, c1, c2;
1259 
1262  if(0 < m) {
1263  /* Construct the sorted order of type B suffixes by using
1264  the sorted order of type B* suffixes. */
1265  for(c1 = alphabetsize - 2; 0 <= c1; --c1) {
1266  /* Scan the suffix array from right to left. */
1267  for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1268  j = SA + (BUCKET_A(c1 + 1) + 1),
1269  c2 = -1;
1270  i < j;) {
1271  if(0 <= (s = *--j)) {
1272  if((0 <= --s) && ((c0 = T[s]) <= c1)) {
1273  *j = ~(s + 1);
1274  if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1275  if(c2 == c0) { *--t = s; }
1276  else {
1277  if(0 <= c2) { BUCKET_B(c2, c1) = t - SA; }
1278  *(t = SA + BUCKET_B(c2 = c0, c1) - 1) = s;
1279  }
1280  }
1281  } else {
1282  *j = ~s;
1283  }
1284  }
1285  }
1286  }
1287 
1288  /* Construct the suffix array by using
1289  the sorted order of type B suffixes. */
1290  *(t = SA + (BUCKET_A(c2 = T[n - 1]) + 1)) = n - 1;
1291  /* Scan the suffix array from left to right. */
1292  for(i = SA, j = SA + n; i < j; ++i) {
1293  if(0 <= (s = *i)) {
1294  if((0 <= --s) && ((c0 = T[s]) >= T[s + 1])) {
1295  if((0 < s) && (T[s - 1] < c0)) { s = ~s; }
1296  if(c0 == c2) { *++t = s; }
1297  else {
1298  BUCKET_A(c2) = t - SA;
1299  *(t = SA + (BUCKET_A(c2 = c0) + 1)) = s;
1300  }
1301  }
1302  } else {
1303  *i = ~s;
1304  }
1305  }
1306 }
1307 
1308 
1309 /* Constructs the burrows-wheeler transformed string directly
1310  by using the sorted order of type B* suffixes. */
1311 template<typename StringIterator_type, typename SAIterator_type, typename pos_type, typename alphabetsize_type>
1312 SAIterator_type
1313 constructBWT_from_typeBstar(const StringIterator_type T, SAIterator_type SA,
1314  pos_type *bucket_A, pos_type *bucket_B,
1315  pos_type n, pos_type m, alphabetsize_type alphabetsize) {
1316 typedef typename std::iterator_traits<StringIterator_type>::value_type value_type;
1317  SAIterator_type i, j, t = SA, orig;
1318  pos_type s;
1319  alphabetsize_type c0, c1, c2;
1320 
1323  if(0 < m) {
1324  /* Construct the sorted order of type B suffixes by using
1325  the sorted order of type B* suffixes. */
1326  for(c1 = alphabetsize - 2; 0 <= c1; --c1) {
1327  /* Scan the suffix array from right to left. */
1328  for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1329  j = SA + (BUCKET_A(c1 + 1) + 1),
1330  c2 = -1;
1331  i < j;) {
1332  if(0 <= (s = *--j)) {
1333  if((0 <= --s) && ((c0 = T[s]) <= c1)) {
1334  *j = ~((pos_type)c0);
1335  if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1336  if(c0 == c2) { *--t = s; }
1337  else {
1338  if(0 <= c2) { BUCKET_B(c2, c1) = pos_type( t - SA ); }
1339  *(t = SA + BUCKET_B(c2 = c0, c1) - 1) = s;
1340  }
1341  }
1342  } else {
1343  *j = ~s;
1344  }
1345  }
1346  }
1347  }
1348 
1349  /* Construct the BWTed string by using
1350  the sorted order of type B suffixes. */
1351  c0 = T[s = n - 1];
1352  if(T[s - 1] < c0) { s = ~((pos_type)T[s - 1]); }
1353  *(t = SA + (BUCKET_A(c2 = c0) + 1)) = s;
1354  /* Scan the suffix array from left to right. */
1355  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1356  if(0 <= (s = *i)) {
1357  if((0 <= --s) && ((c0 = T[s]) >= T[s + 1])) {
1358  *i = c0;
1359  if((0 < s) && (T[s - 1] < c0)) { s = ~((pos_type)T[s - 1]); }
1360  if(c0 == c2) { *++t = s; }
1361  else {
1362  BUCKET_A(c2) = pos_type( t - SA );
1363  *(t = SA + (BUCKET_A(c2 = c0) + 1)) = s;
1364  }
1365  } else if(s < 0) { orig = i; }
1366  } else {
1367  *i = ~s;
1368  }
1369  }
1370 
1371  return orig;
1372 }
1373 
1374 #undef BUCKET_A
1375 #undef BUCKET_B
1376 #undef BUCKET_BSTAR
1377 #undef TYPEB_START
1378 #undef TYPEB_END
1379 
1380 } /* namespace core */
1381 
1382 /*---------------------------------------------------------------------------*/
1383 
1384 
1395 template<typename StringIterator_type, typename SAIterator_type, typename alphabetsize_type>
1396 int
1397 constructSA(const StringIterator_type T, const StringIterator_type T_last,
1398  SAIterator_type SA, SAIterator_type SA_last,
1399  alphabetsize_type alphabetsize = 256) {
1400 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
1401  pos_type *bucket_A = NULL, *bucket_B = NULL;
1402  pos_type n = T_last - T, m = SA_last - SA;
1403  int err = 0;
1404 
1405  /* Check arguments. */
1406  if((n < 0) || (m < n)) { return -1; }
1407  else if(n == 0) { return 0; }
1408  else if(n == 1) { *SA = 0; return 0; }
1409 
1410  try {
1411  bucket_A = new pos_type[alphabetsize]; if(bucket_A == NULL) { throw; }
1412  bucket_B = new pos_type[alphabetsize * alphabetsize]; if(bucket_B == NULL) { throw; }
1413  m = core::sort_typeBstar(T, SA, bucket_A, bucket_B, n, m, alphabetsize);
1414  core::constructSA_from_typeBstar(T, SA, bucket_A, bucket_B, n, m, alphabetsize);
1415  } catch(...) {
1416  err = -2;
1417  }
1418 
1419  delete [] bucket_B;
1420  delete [] bucket_A;
1421 
1422  return err;
1423 }
1424 
1425 template<typename StringIterator_type, typename SAIterator_type, typename alphabetsize_type>
1426 int
1427 constructSA(const StringIterator_type T, SAIterator_type SA,
1428  typename std::iterator_traits<SAIterator_type>::value_type n,
1429  alphabetsize_type alphabetsize = 256) {
1430  return constructSA(T, T + n, SA, SA + n, alphabetsize);
1431 }
1432 
1445 template<typename StringIterator1_type, typename StringIterator2_type,
1446  typename SAIterator_type, typename alphabetsize_type>
1447 typename std::iterator_traits<SAIterator_type>::value_type
1448 constructBWT(const StringIterator1_type T, const StringIterator1_type T_last,
1449  StringIterator2_type U, StringIterator2_type U_last,
1450  SAIterator_type SA, SAIterator_type SA_last,
1451  alphabetsize_type alphabetsize = 256) {
1452 typedef typename std::iterator_traits<StringIterator2_type>::value_type value_type;
1453 typedef typename std::iterator_traits<SAIterator_type>::value_type pos_type;
1454  SAIterator_type piter, i, k;
1455  StringIterator2_type j;
1456  pos_type *bucket_A = NULL, *bucket_B = NULL;
1457  pos_type n = pos_type( T_last - T ), m = pos_type( SA_last - SA ), u = pos_type( U_last - U ), pidx = -1;
1458 
1459  /* Check arguments. */
1460  if((n < pos_type(0)) || (m < n)) { return -1; }
1461  else if(n == 0) { return 0; }
1462  else if(n == 1) { if(0 < u) { U[0] = T[0]; } return 1; }
1463 
1464  try {
1465  bucket_A = new pos_type[alphabetsize]; if(bucket_A == NULL) { throw; }
1466  bucket_B = new pos_type[alphabetsize * alphabetsize]; if(bucket_B == NULL) { throw; }
1467  m = core::sort_typeBstar(T, SA, bucket_A, bucket_B, n, m, alphabetsize);
1468  piter = core::constructBWT_from_typeBstar(T, SA, bucket_A, bucket_B, n, m, alphabetsize);
1469  pidx = pos_type( piter - SA + 1 );
1470  if(0 < u) {
1471  U[pos_type(0)] = T[n - 1];
1472  if(pidx <= u) {
1473  for(i = SA, j = U + 1; i < piter; ++i, ++j) { *j = value_type(*i); }
1474  if(n <= u) { for(i += 1; i < SA_last; ++i, ++j) { *j = value_type(*i); } }
1475  else { for(i += 1, k = SA + u - 1; i < k; ++i, ++j) { *j = value_type(*i); } }
1476  } else {
1477  for(i = SA, j = U + 1, k = SA + u - 1; i < k; ++i, ++j) { *j = value_type(*i); }
1478  }
1479  }
1480  } catch(...) {
1481  pidx = -2;
1482  }
1483 
1484  delete [] bucket_B;
1485  delete [] bucket_A;
1486 
1487  return pidx;
1488 }
1489 
1490 template<typename pos_type, typename StringIterator1_type, typename StringIterator2_type, typename alphabetsize_type>
1491 pos_type
1492 constructBWT(const StringIterator1_type T, const StringIterator1_type T_last,
1493  StringIterator2_type U, StringIterator2_type U_last,
1494  alphabetsize_type alphabetsize = 256) {
1495  pos_type *SA = NULL;
1496  pos_type n = T_last - T, pidx = -1;
1497 
1498  /* Check arguments. */
1499  if(n < 0) { return -1; }
1500  else if(n == 0) { return 0; }
1501  else if(n == 1) { if(U < U_last) { U[pos_type(0)] = T[pos_type(0)]; } return 1; }
1502 
1503  try {
1504  SA = new pos_type[n]; if(SA == NULL) { throw; }
1505  pidx = constructBWT(T, T_last, U, U_last, SA, SA + n, alphabetsize);
1506  } catch(...) {
1507  pidx = -2;
1508  }
1509  delete [] SA;
1510 
1511  return pidx;
1512 }
1513 
1514 template<typename pos_type, typename StringIterator_type, typename alphabetsize_type>
1515 pos_type
1516 constructBWT(StringIterator_type T, StringIterator_type T_last,
1517  alphabetsize_type alphabetsize = 256) {
1518  return constructBWT<pos_type>(T, T_last, T, T_last, alphabetsize);
1519 }
1520 
1521 template<typename StringIterator1_type, typename StringIterator2_type,
1522  typename SAIterator_type, typename alphabetsize_type>
1523 typename std::iterator_traits<SAIterator_type>::value_type
1524 constructBWT(const StringIterator1_type T, StringIterator2_type U, SAIterator_type SA,
1525  typename std::iterator_traits<SAIterator_type>::value_type n,
1526  alphabetsize_type alphabetsize = 256) {
1527  return constructBWT(T, T + n, U, U + n, SA, SA + n, alphabetsize);
1528 }
1529 
1530 template<typename pos_type, typename StringIterator1_type, typename StringIterator2_type, typename alphabetsize_type>
1531 pos_type
1532 constructBWT(const StringIterator1_type T, StringIterator2_type U,
1533  pos_type n, alphabetsize_type alphabetsize = 256) {
1534  return constructBWT<pos_type>(T, T + n, U, U + n, alphabetsize);
1535 }
1536 
1537 template<typename pos_type, typename StringIterator_type, typename alphabetsize_type>
1538 pos_type
1539 constructBWT(StringIterator_type T, pos_type n, alphabetsize_type alphabetsize = 256) {
1540  return constructBWT<pos_type>(T, T + n, alphabetsize);
1541 }
1542 
1543 
1544 } /* namespace divsufsortxx */
1545 
1546 #endif /* _DIVSUFSORTXX_H_ */