29#pragma clang diagnostic ignored "-Wshorten-64-to-32"
33# pragma warning(disable : 4244)
34# pragma warning(disable : 4127)
50# define INLINE __inline
52#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
55#if !defined(ALPHABET_SIZE)
56# define ALPHABET_SIZE (256)
58#define BUCKET_A_SIZE (ALPHABET_SIZE)
59#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
60#if defined(SS_INSERTIONSORT_THRESHOLD)
61# if SS_INSERTIONSORT_THRESHOLD < 1
62# undef SS_INSERTIONSORT_THRESHOLD
63# define SS_INSERTIONSORT_THRESHOLD (1)
66# define SS_INSERTIONSORT_THRESHOLD (8)
68#if defined(SS_BLOCKSIZE)
71# define SS_BLOCKSIZE (0)
72# elif 32768 <= SS_BLOCKSIZE
74# define SS_BLOCKSIZE (32767)
77# define SS_BLOCKSIZE (1024)
81# define SS_MISORT_STACKSIZE (96)
82#elif SS_BLOCKSIZE <= 4096
83# define SS_MISORT_STACKSIZE (16)
85# define SS_MISORT_STACKSIZE (24)
87#define SS_SMERGE_STACKSIZE (32)
88#define TR_INSERTIONSORT_THRESHOLD (8)
89#define TR_STACKSIZE (64)
94# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
97# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
100# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
102#define STACK_PUSH(_a, _b, _c, _d)\
104 assert(ssize < STACK_SIZE);\
105 stack[ssize].a = (_a), stack[ssize].b = (_b),\
106 stack[ssize].c = (_c), stack[ssize++].d = (_d);\
108#define STACK_PUSH5(_a, _b, _c, _d, _e)\
110 assert(ssize < STACK_SIZE);\
111 stack[ssize].a = (_a), stack[ssize].b = (_b),\
112 stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
114#define STACK_POP(_a, _b, _c, _d)\
117 if(ssize == 0) { return; }\
118 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
119 (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
121#define STACK_POP5(_a, _b, _c, _d, _e)\
124 if(ssize == 0) { return; }\
125 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
126 (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
128#define BUCKET_A(_c0) bucket_A[(_c0)]
129#if ALPHABET_SIZE == 256
130#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
131#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
133#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
134#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
140static const int lg_table[256]= {
141 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
142 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
143 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
144 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
145 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
146 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
147 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
148 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
151#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
157 return (n & 0xffff0000) ?
159 24 + lg_table[(n >> 24) & 0xff] :
160 16 + lg_table[(
n >> 16) & 0xff]) :
162 8 + lg_table[(n >> 8) & 0xff] :
163 0 + lg_table[(
n >> 0) & 0xff]);
164#elif SS_BLOCKSIZE < 256
167 return (n & 0xff00) ?
168 8 + lg_table[(
n >> 8) & 0xff] :
169 0 + lg_table[(n >> 0) & 0xff];
177static const int sqq_table[256] = {
178 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
179 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
180 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
181110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
182128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
183143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
184156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
185169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
186181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
187192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
188202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
189212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
190221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
191230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
192239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
193247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
202 e = (
x & 0xffff0000) ?
204 24 + lg_table[(
x >> 24) & 0xff] :
205 16 + lg_table[(x >> 16) & 0xff]) :
207 8 + lg_table[(
x >> 8) & 0xff] :
208 0 + lg_table[(x >> 0) & 0xff]);
211 y = sqq_table[
x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
212 if(e >= 24) {
y = (
y + 1 +
x /
y) >> 1; }
213 y = (
y + 1 +
x /
y) >> 1;
215 y = (sqq_table[
x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
217 return sqq_table[
x] >> 4;
220 return (x < (y * y)) ? y - 1 : y;
231ss_compare(
const unsigned char *T,
232 const int *p1,
const int *p2,
234 const unsigned char *U1, *U2, *U1n, *U2n;
236 for(U1 = T + depth + *p1,
237 U2 = T + depth + *p2,
238 U1n = T + *(p1 + 1) + 2,
239 U2n = T + *(p2 + 1) + 2;
240 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
245 (U2 < U2n ? *U1 - *U2 : 1) :
252#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
257ss_insertionsort(
const unsigned char *T,
const int *PA,
258 int *first,
int *last,
int depth) {
263 for(i = last - 2;
first <= i; --i) {
264 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
265 do { *(j - 1) = *j; }
while((++j < last) && (*j < 0));
266 if(last <= j) {
break; }
268 if(r == 0) { *j = ~*j; }
278#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
282ss_fixdown(
const unsigned char *Td,
const int *PA,
283 int *
SA,
int i,
int size) {
288 for(v =
SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size;
SA[i] =
SA[k], i = k) {
289 d = Td[PA[
SA[k = j++]]];
290 if(d < (e = Td[PA[
SA[j]]])) { k = j;
d = e; }
291 if(d <= c) {
break; }
299ss_heapsort(
const unsigned char *Td,
const int *PA,
int *
SA,
int size) {
304 if((size % 2) == 0) {
306 if(Td[PA[
SA[m / 2]]] < Td[PA[
SA[m]]]) {
SWAP(
SA[m],
SA[m / 2]); }
309 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA,
SA, i, m); }
310 if((size % 2) == 0) {
SWAP(
SA[0],
SA[m]); ss_fixdown(Td, PA,
SA, 0, m); }
311 for(i = m - 1; 0 < i; --i) {
313 ss_fixdown(Td, PA,
SA, 0, i);
324ss_median3(
const unsigned char *Td,
const int *PA,
325 int *v1,
int *v2,
int *v3) {
327 if(Td[PA[*v1]] > Td[PA[*v2]]) {
SWAP(v1, v2); }
328 if(Td[PA[*v2]] > Td[PA[*v3]]) {
329 if(Td[PA[*v1]] > Td[PA[*v3]]) {
return v1; }
338ss_median5(
const unsigned char *Td,
const int *PA,
339 int *v1,
int *v2,
int *v3,
int *v4,
int *v5) {
341 if(Td[PA[*v2]] > Td[PA[*v3]]) {
SWAP(v2, v3); }
342 if(Td[PA[*v4]] > Td[PA[*v5]]) {
SWAP(v4, v5); }
343 if(Td[PA[*v2]] > Td[PA[*v4]]) {
SWAP(v2, v4);
SWAP(v3, v5); }
344 if(Td[PA[*v1]] > Td[PA[*v3]]) {
SWAP(v1, v3); }
345 if(Td[PA[*v1]] > Td[PA[*v4]]) {
SWAP(v1, v4);
SWAP(v3, v5); }
346 if(Td[PA[*v3]] > Td[PA[*v4]]) {
return v4; }
353ss_pivot(
const unsigned char *Td,
const int *PA,
int *first,
int *last) {
362 return ss_median3(Td, PA, first, middle, last - 1);
365 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
369 first = ss_median3(Td, PA, first, first + t, first + (t << 1));
370 middle = ss_median3(Td, PA, middle - t, middle, middle + t);
371 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
372 return ss_median3(Td, PA, first, middle, last);
381ss_partition(
const int *PA,
382 int *first,
int *last,
int depth) {
385 for(a = first - 1, b = last;;) {
386 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
387 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
388 if(b <= a) {
break; }
400ss_mintrosort(
const unsigned char *T,
const int *PA,
401 int *first,
int *last,
403#define STACK_SIZE SS_MISORT_STACKSIZE
404 struct {
int *a, *b, c;
int d; } stack[
STACK_SIZE];
405 const unsigned char *Td;
406 int *a, *b, *c, *
d, *e, *f;
412 for(ssize = 0, limit = ss_ilg(last - first);;) {
415#if 1 < SS_INSERTIONSORT_THRESHOLD
416 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
423 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
425 for(a = first + 1, v = Td[PA[*first]]; a <
last; ++a) {
426 if((x = Td[PA[*a]]) != v) {
427 if(1 < (a - first)) {
break; }
432 if(Td[PA[*first] - 1] < v) {
433 first = ss_partition(PA, first, a, depth);
435 if((a - first) <= (last - a)) {
436 if(1 < (a - first)) {
438 last = a, depth += 1,
limit = ss_ilg(a - first);
444 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
447 last = a, depth += 1,
limit = ss_ilg(a - first);
454 a = ss_pivot(Td, PA, first, last);
459 for(b = first; (++b <
last) && ((x = Td[PA[*b]]) == v);) { }
460 if(((a = b) < last) && (
x < v)) {
461 for(; (++b <
last) && ((x = Td[PA[*b]]) <= v);) {
462 if(x == v) {
SWAP(*b, *a); ++a; }
465 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
466 if((b < (d = c)) && (
x > v)) {
467 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
468 if(x == v) {
SWAP(*c, *d); --
d; }
473 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
474 if(x == v) {
SWAP(*b, *a); ++a; }
476 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
477 if(x == v) {
SWAP(*c, *d); --
d; }
484 if((s = a - first) > (t = b - a)) { s = t; }
485 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) {
SWAP(*e, *f); }
486 if((s = d - c) > (t = last - d - 1)) { s = t; }
487 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) {
SWAP(*e, *f); }
489 a =
first + (b - a), c = last - (d - c);
490 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
492 if((a - first) <= (
last - c)) {
493 if((last - c) <= (c - b)) {
497 }
else if((a - first) <= (c - b)) {
507 if((a - first) <= (c - b)) {
511 }
else if((last - c) <= (c - b)) {
523 if(Td[PA[*first] - 1] < v) {
524 first = ss_partition(PA, first, last, depth);
525 limit = ss_ilg(last - first);
542ss_blockswap(
int *a,
int *b,
int n) {
544 for(; 0 <
n; --
n, ++a, ++b) {
545 t = *a, *a = *b, *b = t;
551ss_rotate(
int *first,
int *middle,
int *last) {
555 for(; (0 < l) && (0 < r);) {
556 if(l == r) { ss_blockswap(first, middle, l);
break; }
561 *a-- = *b, *b-- = *a;
565 if((r -= l + 1) <= l) {
break; }
574 *a++ = *b, *b++ = *a;
578 if((l -= r + 1) <= r) {
break; }
592ss_inplacemerge(
const unsigned char *T,
const int *PA,
593 int *first,
int *middle,
int *last,
602 if(*(last - 1) < 0) {
x = 1; p = PA + ~*(
last - 1); }
603 else {
x = 0; p = PA + *(
last - 1); }
604 for(a = first, len = middle - first, half = len >> 1, r = -1;
606 len = half, half >>= 1) {
608 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
611 half -= (len & 1) ^ 1;
617 if(r == 0) { *a = ~*a; }
618 ss_rotate(a, middle, last);
621 if(first == middle) {
break; }
624 if(x != 0) {
while(*--last < 0) { } }
625 if(middle == last) {
break; }
635ss_mergeforward(
const unsigned char *T,
const int *PA,
636 int *first,
int *middle,
int *last,
637 int *buf,
int depth) {
638 int *a, *b, *c, *bufend;
643 ss_blockswap(buf, first, middle - first);
645 for(t = *(a = first), b = buf, c = middle;;) {
646 r = ss_compare(T, PA + *b, PA + *c, depth);
650 if(bufend <= b) { *bufend = t;
return; }
655 *a++ = *c, *c++ = *a;
657 while(b < bufend) { *a++ = *b, *b++ = *a; }
666 if(bufend <= b) { *bufend = t;
return; }
671 *a++ = *c, *c++ = *a;
673 while(b < bufend) { *a++ = *b, *b++ = *a; }
685ss_mergebackward(
const unsigned char *T,
const int *PA,
686 int *first,
int *middle,
int *last,
687 int *buf,
int depth) {
689 int *a, *b, *c, *bufend;
695 ss_blockswap(buf, middle, last - middle);
698 if(*bufend < 0) { p1 = PA + ~*bufend;
x |= 1; }
699 else { p1 = PA + *bufend; }
700 if(*(middle - 1) < 0) { p2 = PA + ~*(
middle - 1);
x |= 2; }
701 else { p2 = PA + *(
middle - 1); }
702 for(t = *(a = last - 1), b = bufend, c =
middle - 1;;) {
703 r = ss_compare(T, p1, p2, depth);
705 if(x & 1) {
do { *a-- = *b, *b-- = *a; }
while(*b < 0);
x ^= 1; }
707 if(b <= buf) { *buf = t;
break; }
709 if(*b < 0) { p1 = PA + ~*b;
x |= 1; }
710 else { p1 = PA + *b; }
712 if(x & 2) {
do { *a-- = *c, *c-- = *a; }
while(*c < 0);
x ^= 2; }
713 *a-- = *c, *c-- = *a;
715 while(buf < b) { *a-- = *b, *b-- = *a; }
719 if(*c < 0) { p2 = PA + ~*c;
x |= 2; }
720 else { p2 = PA + *c; }
722 if(x & 1) {
do { *a-- = *b, *b-- = *a; }
while(*b < 0);
x ^= 1; }
724 if(b <= buf) { *buf = t;
break; }
726 if(x & 2) {
do { *a-- = *c, *c-- = *a; }
while(*c < 0);
x ^= 2; }
727 *a-- = *c, *c-- = *a;
729 while(buf < b) { *a-- = *b, *b-- = *a; }
733 if(*b < 0) { p1 = PA + ~*b;
x |= 1; }
734 else { p1 = PA + *b; }
735 if(*c < 0) { p2 = PA + ~*c;
x |= 2; }
736 else { p2 = PA + *c; }
744ss_swapmerge(
const unsigned char *T,
const int *PA,
745 int *first,
int *middle,
int *last,
746 int *buf,
int bufsize,
int depth) {
747#define STACK_SIZE SS_SMERGE_STACKSIZE
748#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
749#define MERGE_CHECK(a, b, c)\
752 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
755 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
759 struct {
int *a, *b, *c;
int d; } stack[
STACK_SIZE];
760 int *l, *r, *lm, *rm;
765 for(check = 0, ssize = 0;;) {
766 if((last - middle) <= bufsize) {
767 if((first < middle) && (middle < last)) {
768 ss_mergebackward(T, PA, first, middle, last, buf, depth);
775 if((middle - first) <= bufsize) {
777 ss_mergeforward(T, PA, first, middle, last, buf, depth);
784 for(m = 0, len =
MIN(middle - first, last - middle), half = len >> 1;
786 len = half, half >>= 1) {
787 if(ss_compare(T, PA +
GETIDX(*(middle + m + half)),
788 PA +
GETIDX(*(middle - m - half - 1)), depth) < 0) {
790 half -= (len & 1) ^ 1;
796 ss_blockswap(lm, middle, m);
801 if(first < lm) {
for(; *--l < 0;) { } next |= 4; }
803 }
else if(first < lm) {
804 for(; *r < 0; ++r) { }
809 if((l - first) <= (last - r)) {
810 STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
813 if((next & 2) && (r ==
middle)) { next ^= 6; }
814 STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
818 if(ss_compare(T, PA +
GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
836sssort(
const unsigned char *T,
const int *PA,
837 int *first,
int *last,
838 int *buf,
int bufsize,
839 int depth,
int n,
int lastsuffix) {
843 int j, k, curbufsize,
limit;
847 if(lastsuffix != 0) { ++
first; }
850 ss_mintrosort(T, PA, first, last, depth);
853 (bufsize < (last - first)) &&
854 (bufsize < (limit = ss_isqrt(last - first)))) {
861#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
863#elif 1 < SS_BLOCKSIZE
868 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
869 for(b = a, k =
SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
870 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
873#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
874 ss_mintrosort(T, PA, a, middle, depth);
875#elif 1 < SS_BLOCKSIZE
876 ss_insertionsort(T, PA, a, middle, depth);
880 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
885#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
886 ss_mintrosort(T, PA, middle, last, depth);
887#elif 1 < SS_BLOCKSIZE
888 ss_insertionsort(T, PA, middle, last, depth);
890 ss_inplacemerge(T, PA, first, middle, last, depth);
894 if(lastsuffix != 0) {
896 int PAi[2]; PAi[0] = PA[*(
first - 1)], PAi[1] = n - 2;
897 for(a = first, i = *(first - 1);
898 (a <
last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
912 return (n & 0xffff0000) ?
914 24 + lg_table[(n >> 24) & 0xff] :
915 16 + lg_table[(
n >> 16) & 0xff]) :
917 8 + lg_table[(n >> 8) & 0xff] :
918 0 + lg_table[(
n >> 0) & 0xff]);
927tr_insertionsort(
const int *ISAd,
int *first,
int *last) {
931 for(a = first + 1; a <
last; ++a) {
932 for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
933 do { *(b + 1) = *b; }
while((first <= --b) && (*b < 0));
934 if(b < first) {
break; }
936 if(r == 0) { *b = ~*b; }
946tr_fixdown(
const int *ISAd,
int *
SA,
int i,
int size) {
951 for(v =
SA[i], c = ISAd[v]; (j = 2 * i + 1) < size;
SA[i] =
SA[k], i = k) {
952 d = ISAd[
SA[k = j++]];
953 if(d < (e = ISAd[
SA[j]])) { k = j;
d = e; }
954 if(d <= c) {
break; }
962tr_heapsort(
const int *ISAd,
int *
SA,
int size) {
967 if((size % 2) == 0) {
969 if(ISAd[
SA[m / 2]] < ISAd[
SA[m]]) {
SWAP(
SA[m],
SA[m / 2]); }
972 for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd,
SA, i, m); }
973 if((size % 2) == 0) {
SWAP(
SA[0],
SA[m]); tr_fixdown(ISAd,
SA, 0, m); }
974 for(i = m - 1; 0 < i; --i) {
976 tr_fixdown(ISAd,
SA, 0, i);
987tr_median3(
const int *ISAd,
int *v1,
int *v2,
int *v3) {
989 if(ISAd[*v1] > ISAd[*v2]) {
SWAP(v1, v2); }
990 if(ISAd[*v2] > ISAd[*v3]) {
991 if(ISAd[*v1] > ISAd[*v3]) {
return v1; }
1000tr_median5(
const int *ISAd,
1001 int *v1,
int *v2,
int *v3,
int *v4,
int *v5) {
1003 if(ISAd[*v2] > ISAd[*v3]) {
SWAP(v2, v3); }
1004 if(ISAd[*v4] > ISAd[*v5]) {
SWAP(v4, v5); }
1005 if(ISAd[*v2] > ISAd[*v4]) {
SWAP(v2, v4);
SWAP(v3, v5); }
1006 if(ISAd[*v1] > ISAd[*v3]) {
SWAP(v1, v3); }
1007 if(ISAd[*v1] > ISAd[*v4]) {
SWAP(v1, v4);
SWAP(v3, v5); }
1008 if(ISAd[*v3] > ISAd[*v4]) {
return v4; }
1015tr_pivot(
const int *ISAd,
int *first,
int *last) {
1024 return tr_median3(ISAd, first, middle, last - 1);
1027 return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1031 first = tr_median3(ISAd, first, first + t, first + (t << 1));
1032 middle = tr_median3(ISAd, middle - t, middle, middle + t);
1033 last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1034 return tr_median3(ISAd, first, middle, last);
1050trbudget_init(
trbudget_t *budget,
int chance,
int incval) {
1057trbudget_check(
trbudget_t *budget,
int size) {
1058 if(size <= budget->remain) { budget->
remain -=
size;
return 1; }
1070tr_partition(
const int *ISAd,
1071 int *first,
int *middle,
int *last,
1072 int **pa,
int **pb,
int v) {
1073 int *a, *b, *c, *
d, *e, *f;
1077 for(b = middle - 1; (++b <
last) && ((x = ISAd[*b]) == v);) { }
1078 if(((a = b) < last) && (
x < v)) {
1079 for(; (++b <
last) && ((x = ISAd[*b]) <= v);) {
1080 if(x == v) {
SWAP(*b, *a); ++a; }
1083 for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1084 if((b < (d = c)) && (
x > v)) {
1085 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1086 if(x == v) {
SWAP(*c, *d); --
d; }
1091 for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1092 if(x == v) {
SWAP(*b, *a); ++a; }
1094 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1095 if(x == v) {
SWAP(*c, *d); --
d; }
1101 if((s = a - first) > (t = b - a)) { s = t; }
1102 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) {
SWAP(*e, *f); }
1103 if((s = d - c) > (t = last - d - 1)) { s = t; }
1104 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) {
SWAP(*e, *f); }
1105 first += (b - a), last -= (d - c);
1112tr_copy(
int *ISA,
const int *
SA,
1113 int *first,
int *a,
int *b,
int *last,
1121 for(c = first, d = a - 1; c <=
d; ++c) {
1122 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1127 for(c = last - 1, e = d + 1, d = b; e <
d; --c) {
1128 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1137tr_partialcopy(
int *ISA,
const int *
SA,
1138 int *first,
int *a,
int *b,
int *last,
1142 int rank, lastrank, newrank = -1;
1146 for(c = first, d = a - 1; c <=
d; ++c) {
1147 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1149 rank = ISA[s + depth];
1150 if(lastrank != rank) { lastrank =
rank; newrank =
d -
SA; }
1156 for(e = d;
first <= e; --e) {
1158 if(lastrank != rank) { lastrank =
rank; newrank = e -
SA; }
1159 if(newrank != rank) { ISA[*e] = newrank; }
1163 for(c = last - 1, e = d + 1, d = b; e <
d; --c) {
1164 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1166 rank = ISA[s + depth];
1167 if(lastrank != rank) { lastrank =
rank; newrank =
d -
SA; }
1175tr_introsort(
int *ISA,
const int *ISAd,
1176 int *
SA,
int *first,
int *last,
1178#define STACK_SIZE TR_STACKSIZE
1179 struct {
const int *a;
int *b, *c;
int d, e; }stack[
STACK_SIZE];
1183 int incr = ISAd - ISA;
1185 int ssize, trlink = -1;
1187 for(ssize = 0, limit = tr_ilg(last - first);;) {
1192 tr_partition(ISAd - incr, first, first, last, &a, &b, last -
SA - 1);
1196 for(c = first, v = a -
SA - 1; c < a; ++c) { ISA[*c] = v; }
1199 for(c = a, v = b -
SA - 1; c < b; ++c) { ISA[*c] = v; }
1205 STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1208 if((a - first) <= (last - b)) {
1209 if(1 < (a - first)) {
1210 STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1212 }
else if(1 < (last - b)) {
1215 STACK_POP5(ISAd, first, last, limit, trlink);
1218 if(1 < (last - b)) {
1219 STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1221 }
else if(1 < (a - first)) {
1224 STACK_POP5(ISAd, first, last, limit, trlink);
1227 }
else if(limit == -2) {
1229 a = stack[--ssize].b, b = stack[ssize].c;
1230 if(stack[ssize].d == 0) {
1231 tr_copy(ISA,
SA, first, a, b, last, ISAd - ISA);
1233 if(0 <= trlink) { stack[trlink].d = -1; }
1234 tr_partialcopy(ISA,
SA, first, a, b, last, ISAd - ISA);
1236 STACK_POP5(ISAd, first, last, limit, trlink);
1241 do { ISA[*a] = a -
SA; }
while((++a < last) && (0 <= *a));
1245 a =
first;
do { *a = ~*a; }
while(*++a < 0);
1246 next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1247 if(++a < last) {
for(b = first, v = a -
SA - 1; b < a; ++b) { ISA[*b] = v; } }
1250 if(trbudget_check(budget, a - first)) {
1251 if((a - first) <= (
last - a)) {
1255 if(1 < (last - a)) {
1263 if(0 <= trlink) { stack[trlink].d = -1; }
1264 if(1 < (last - a)) {
1267 STACK_POP5(ISAd, first, last, limit, trlink);
1271 STACK_POP5(ISAd, first, last, limit, trlink);
1278 tr_insertionsort(ISAd, first, last);
1284 tr_heapsort(ISAd, first, last - first);
1285 for(a = last - 1;
first < a; a = b) {
1286 for(x = ISAd[*a], b = a - 1; (
first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1293 a = tr_pivot(ISAd, first, last);
1298 tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1299 if((last - first) != (b - a)) {
1300 next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1303 for(c = first, v = a -
SA - 1; c < a; ++c) { ISA[*c] = v; }
1304 if(b < last) {
for(c = a, v = b -
SA - 1; c < b; ++c) { ISA[*c] = v; } }
1307 if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1308 if((a - first) <= (
last - b)) {
1309 if((last - b) <= (b - a)) {
1310 if(1 < (a - first)) {
1314 }
else if(1 < (last - b)) {
1320 }
else if((a - first) <= (b - a)) {
1321 if(1 < (a - first)) {
1335 if((a - first) <= (b - a)) {
1336 if(1 < (last - b)) {
1340 }
else if(1 < (a - first)) {
1346 }
else if((last - b) <= (b - a)) {
1347 if(1 < (last - b)) {
1362 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1363 if((a - first) <= (last - b)) {
1364 if(1 < (a - first)) {
1367 }
else if(1 < (last - b)) {
1370 STACK_POP5(ISAd, first, last, limit, trlink);
1373 if(1 < (last - b)) {
1376 }
else if(1 < (a - first)) {
1379 STACK_POP5(ISAd, first, last, limit, trlink);
1384 if(trbudget_check(budget, last - first)) {
1385 limit = tr_ilg(last - first), ISAd += incr;
1387 if(0 <= trlink) { stack[trlink].d = -1; }
1388 STACK_POP5(ISAd, first, last, limit, trlink);
1402trsort(
int *ISA,
int *
SA,
int n,
int depth) {
1406 int t, skip, unsorted;
1408 trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1410 for(ISAd = ISA + depth; -
n < *
SA; ISAd += ISAd - ISA) {
1415 if((t = *first) < 0) {
first -= t; skip += t; }
1417 if(skip != 0) { *(
first + skip) = skip; skip = 0; }
1419 if(1 < (last - first)) {
1421 tr_introsort(ISA, ISAd,
SA, first, last, &budget);
1422 if(budget.
count != 0) { unsorted += budget.
count; }
1424 }
else if((last - first) == 1) {
1429 }
while(first < (
SA + n));
1430 if(skip != 0) { *(
first + skip) = skip; }
1431 if(unsorted == 0) {
break; }
1441sort_typeBstar(
const unsigned char *T,
int *
SA,
1442 int *bucket_A,
int *bucket_B,
1443 int n,
int openMP) {
1444 int *PAb, *ISAb, *buf;
1449 int i, j, k, t,
m, bufsize;
1463 for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1465 do { ++
BUCKET_A(c1 = c0); }
while((0 <= --i) && ((c0 = T[i]) >= c1));
1471 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1497 PAb =
SA +
n -
m; ISAb =
SA +
m;
1498 for(i = m - 2; 0 <= i; --i) {
1499 t = PAb[i], c0 =
T[t], c1 =
T[t + 1];
1502 t = PAb[
m - 1], c0 =
T[t], c1 =
T[t + 1];
1511#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
1513 bufsize = (
n - (2 *
m)) / omp_get_num_threads();
1514 curbuf = buf + omp_get_thread_num() * bufsize;
1517 #pragma omp critical(sssort_lock)
1525 if(--d0 < 0) {
break; }
1527 }
while(((l - k) <= 1) && (0 < (l = k)));
1528 c0 = d0, c1 = d1, j = k;
1531 if(l == 0) {
break; }
1532 sssort(T, PAb,
SA + k,
SA + l,
1533 curbuf, bufsize, 2, n, *(
SA + k) == (m - 1));
1539 buf =
SA +
m, bufsize =
n - (2 *
m);
1544 sssort(T, PAb,
SA + i,
SA + j,
1545 buf, bufsize, 2, n, *(
SA + i) == (m - 1));
1551 buf =
SA +
m, bufsize =
n - (2 *
m);
1556 sssort(T, PAb,
SA + i,
SA + j,
1557 buf, bufsize, 2, n, *(
SA + i) == (m - 1));
1564 for(i = m - 1; 0 <= i; --i) {
1567 do { ISAb[
SA[i]] = i; }
while((0 <= --i) && (0 <=
SA[i]));
1569 if(i <= 0) {
break; }
1572 do { ISAb[
SA[i] = ~SA[i]] = j; }
while(
SA[--i] < 0);
1577 trsort(ISAb,
SA, m, 1);
1580 for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1584 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1585 SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1600 --i, --k) {
SA[i] =
SA[k]; }
1613construct_SA(
const unsigned char *T,
int *
SA,
1614 int *bucket_A,
int *bucket_B,
1626 j =
SA +
BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1631 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1632 assert(T[s - 1] <= T[s]);
1635 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1643 assert(((s == 0) && (T[s] == c1)) || (s < 0));
1653 *k++ = (
T[
n - 2] < c2) ? ~(n - 1) : (
n - 1);
1655 for(i =
SA, j =
SA + n; i < j; ++i) {
1657 assert(T[s - 1] >= T[s]);
1659 if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1677construct_BWT(
const unsigned char *T,
int *
SA,
1678 int *bucket_A,
int *bucket_B,
1680 int *i, *j, *k, *orig;
1690 j =
SA +
BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1695 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1696 assert(T[s - 1] <= T[s]);
1699 if((0 < s) && (
T[s - 1] > c0)) { s = ~s; }
1720 *k++ = (
T[
n - 2] < c2) ? ~((
int)
T[
n - 2]) : (n - 1);
1722 for(i =
SA, j =
SA + n, orig =
SA; i < j; ++i) {
1724 assert(T[s - 1] >= T[s]);
1727 if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1748construct_BWT_indexes(
const unsigned char *T,
int *
SA,
1749 int *bucket_A,
int *bucket_B,
1751 unsigned char * num_indexes,
int * indexes) {
1752 int *i, *j, *k, *orig;
1758 mod |= mod >> 1; mod |= mod >> 2;
1759 mod |= mod >> 4; mod |= mod >> 8;
1760 mod |= mod >> 16; mod >>= 1;
1762 *num_indexes = (
unsigned char)((n - 1) / (mod + 1));
1771 j =
SA +
BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1776 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1777 assert(T[s - 1] <= T[s]);
1779 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j -
SA;
1783 if((0 < s) && (
T[s - 1] > c0)) { s = ~s; }
1804 if (T[n - 2] < c2) {
1805 if (((n - 1) & mod) == 0) indexes[(
n - 1) / (mod + 1) - 1] = k -
SA;
1806 *k++ = ~((int)T[n - 2]);
1813 for(i =
SA, j =
SA + n, orig =
SA; i < j; ++i) {
1815 assert(T[s - 1] >= T[s]);
1817 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i -
SA;
1826 if((0 < s) && (T[s - 1] < c0)) {
1827 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k -
SA;
1828 *k++ = ~((int)T[s - 1]);
1848 int *bucket_A, *bucket_B;
1853 if((T == NULL) || (
SA == NULL) || (n < 0)) {
return -1; }
1854 else if(n == 0) {
return 0; }
1855 else if(n == 1) {
SA[0] = 0;
return 0; }
1856 else if(n == 2) { m = (T[0] < T[1]);
SA[m ^ 1] = 0,
SA[m] = 1;
return 0; }
1862 if((bucket_A != NULL) && (bucket_B != NULL)) {
1863 m = sort_typeBstar(T,
SA, bucket_A, bucket_B, n, openMP);
1864 construct_SA(T,
SA, bucket_A, bucket_B, n, m);
1876divbwt(
const unsigned char *T,
unsigned char *U,
int *A,
int n,
unsigned char * num_indexes,
int * indexes,
int openMP) {
1878 int *bucket_A, *bucket_B;
1882 if((T == NULL) || (U == NULL) || (n < 0)) {
return -1; }
1883 else if(n <= 1) {
if(n == 1) { U[0] = T[0]; }
return n; }
1885 if((
B = A) == NULL) {
B = (
int *)malloc((
size_t)(n + 1) *
sizeof(
int)); }
1890 if((
B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1891 m = sort_typeBstar(T,
B, bucket_A, bucket_B, n, openMP);
1893 if (num_indexes == NULL || indexes == NULL) {
1894 pidx = construct_BWT(T,
B, bucket_A, bucket_B, n, m);
1896 pidx = construct_BWT_indexes(T,
B, bucket_A, bucket_B, n, m, num_indexes, indexes);
1901 for(i = 0; i < pidx; ++i) { U[i + 1] = (
unsigned char)
B[i]; }
1902 for(i += 1; i < n; ++i) { U[i] = (
unsigned char)
B[i]; }
1910 if(A == NULL) { free(
B); }
#define assert(condition)
#define STACK_POP5(_a, _b, _c, _d, _e)
#define STACK_PUSH5(_a, _b, _c, _d, _e)
#define TR_INSERTIONSORT_THRESHOLD
#define BUCKET_BSTAR(_c0, _c1)
#define SS_INSERTIONSORT_THRESHOLD
#define STACK_POP(_a, _b, _c, _d)
#define BUCKET_B(_c0, _c1)
int divsufsort(const unsigned char *T, int *SA, int n, int openMP)
#define STACK_PUSH(_a, _b, _c, _d)
#define MERGE_CHECK(a, b, c)
int divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char *num_indexes, int *indexes, int openMP)
uint32_t size(sys::state const &state)