36 namespace libdivsufsort {
42 const saint_t sqq_table[256] = {
43 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
44 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
45 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
46 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
47 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
48 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
49 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
50 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
51 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
52 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
53 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
54 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
55 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
56 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
57 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
58 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
66 if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) {
return SS_BLOCKSIZE; }
67 e = (x & 0xffff0000) ?
69 24 + lg_table[(x >> 24) & 0xff] :
70 16 + lg_table[(x >> 16) & 0xff]) :
72 8 + lg_table[(x >> 8) & 0xff] :
73 0 + lg_table[(x >> 0) & 0xff]);
76 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
77 if(e >= 24) { y = (y + 1 + x / y) >> 1; }
78 y = (y + 1 + x / y) >> 1;
80 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
82 return sqq_table[x] >> 4;
85 return (x < (y * y)) ? y - 1 : y;
94 template<
typename buffer_t1,
typename buffer_t2>
95 inline saint_t ss_compare(
const sauchar_t *T,
96 buffer_t1& B1, saidx_t p1,
97 buffer_t2& B2, saidx_t p2,
99 const sauchar_t *U1, *U2, *U1n, *U2n;
101 for(U1 = T + depth + B1[p1],
102 U2 = T + depth + B2[p2],
103 U1n = T + B1[p1 + 1] + 2,
104 U2n = T + B2[p2 + 1] + 2;
105 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
110 (U2 < U2n ? *U1 - *U2 : 1) :
117 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) 121 template<
typename buffer_t>
122 inline void ss_insertionsort(
const sauchar_t *T, buffer_t& B,
const saidx_t PA,
123 saidx_t first, saidx_t last, saidx_t depth) {
128 for(i = last - 2; first <= i; --i) {
129 for(t = B[i], j = i + 1; 0 < (r = ss_compare(T, B, PA + t, B, PA + B[j], depth));) {
130 do { B[j - 1] = B[j]; }
while((++j < last) && (B[j] < 0));
131 if(last <= j) {
break; }
133 if(r == 0) { B[j] = ~B[j]; }
143 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 145 template<
typename buffer_t>
146 inline void ss_fixdown(
const sauchar_t *Td, buffer_t& B,
const saidx_t PA,
147 saidx_t
SA, saidx_t i, saidx_t size) {
152 for(v = B[SA + i], c = Td[B[PA + v]]; (j = 2 * i + 1) < size; B[SA + i] = B[SA + k], i = k) {
154 d = Td[B[PA + B[SA + k]]];
155 if(d < (e = Td[B[PA + B[SA + j]]])) { k = j; d = e; }
156 if(d <= c) {
break; }
162 template<
typename buffer_t>
163 inline void ss_heapsort(
const sauchar_t *Td, buffer_t& B,
const saidx_t PA, saidx_t SA, saidx_t size) {
168 if((size % 2) == 0) {
170 if(Td[B[PA + B[SA + m / 2]]] < Td[B[PA + B[SA + m]]]) { SWAP(B[SA + m], B[SA + m / 2]); }
173 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, B, PA, SA, i, m); }
174 if((size % 2) == 0) { SWAP(B[SA + 0], B[SA + m]); ss_fixdown(Td, B, PA, SA, 0, m); }
175 for(i = m - 1; 0 < i; --i) {
176 t = B[SA + 0], B[SA + 0] = B[SA + i];
177 ss_fixdown(Td, B, PA, SA, 0, i);
186 template<
typename buffer_t>
187 inline saidx_t ss_median3(
const sauchar_t *Td, buffer_t& B,
const saidx_t PA,
188 saidx_t v1, saidx_t v2, saidx_t v3) {
190 if(Td[B[PA + B[v1]]] > Td[B[PA + B[v2]]]) { SWAP(v1, v2); }
191 if(Td[B[PA + B[v2]]] > Td[B[PA + B[v3]]]) {
192 if(Td[B[PA + B[v1]]] > Td[B[PA + B[v3]]]) {
return v1; }
199 template<
typename buffer_t>
200 inline saidx_t ss_median5(
const sauchar_t *Td, buffer_t& B,
const saidx_t PA,
201 saidx_t v1, saidx_t v2, saidx_t v3, saidx_t v4, saidx_t v5) {
203 if(Td[B[PA + B[v2]]] > Td[B[PA + B[v3]]]) { SWAP(v2, v3); }
204 if(Td[B[PA + B[v4]]] > Td[B[PA + B[v5]]]) { SWAP(v4, v5); }
205 if(Td[B[PA + B[v2]]] > Td[B[PA + B[v4]]]) { SWAP(v2, v4); SWAP(v3, v5); }
206 if(Td[B[PA + B[v1]]] > Td[B[PA + B[v3]]]) { SWAP(v1, v3); }
207 if(Td[B[PA + B[v1]]] > Td[B[PA + B[v4]]]) { SWAP(v1, v4); SWAP(v3, v5); }
208 if(Td[B[PA + B[v3]]] > Td[B[PA + B[v4]]]) {
return v4; }
213 template<
typename buffer_t>
214 inline saidx_t ss_pivot(
const sauchar_t *Td, buffer_t& B,
const saidx_t PA, saidx_t first, saidx_t last) {
219 middle = first + t / 2;
223 return ss_median3(Td, B, PA, first, middle, last - 1);
226 return ss_median5(Td, B, PA, first, first + t, middle, last - 1 - t, last - 1);
230 first = ss_median3(Td, B, PA, first, first + t, first + (t << 1));
231 middle = ss_median3(Td, B, PA, middle - t, middle, middle + t);
232 last = ss_median3(Td, B, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
233 return ss_median3(Td, B, PA, first, middle, last);
240 template<
typename buffer_t>
241 inline saidx_t ss_partition(buffer_t& B,
const saidx_t PA,
242 saidx_t first, saidx_t last, saidx_t depth) {
245 for(a = first - 1, b = last;;) {
246 for(; (++a < b) && ((B[PA + B[a]] + depth) >= (B[PA + B[a] + 1] + 1));) { B[a] = ~B[a]; }
247 for(; (a < --b) && ((B[PA + B[b]] + depth) < (B[PA + B[b] + 1] + 1));) { }
248 if(b <= a) {
break; }
253 if(first < a) { B[first] = ~B[first]; }
258 template<
typename buffer_t>
259 inline void ss_mintrosort(
const sauchar_t *T, buffer_t& B,
const saidx_t PA,
260 saidx_t first, saidx_t last,
262 #define STACK_SIZE SS_MISORT_STACKSIZE 263 struct { saidx_t a, b, c; saint_t d; } stack[STACK_SIZE];
265 saidx_t a, b, c, d, e, f;
271 for(ssize = 0, limit = ilg<saidx_t>(last - first);;) {
273 if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
274 #if 1 < SS_INSERTIONSORT_THRESHOLD 275 if(1 < (last - first)) { ss_insertionsort(T, B, PA, first, last, depth); }
277 STACK_POP(first, last, depth, limit);
283 ss_heapsort(Td, B, PA, first, last - first);
286 for(a = first + 1, v = Td[B[PA + B[first]]]; a < last; ++a) {
287 if((x = Td[B[PA + B[a]]]) != v) {
288 if(1 < (a - first)) {
break; }
293 if(Td[B[PA + B[first]] - 1] < v) {
294 first = ss_partition(B, PA, first, a, depth);
296 if((a - first) <= (last - a)) {
297 if(1 < (a - first)) {
298 STACK_PUSH(a, last, depth, -1);
299 last = a, depth += 1, limit = ilg<saidx_t>(a - first);
301 first = a, limit = -1;
305 STACK_PUSH(first, a, depth + 1, ilg<saidx_t>(a - first));
306 first = a, limit = -1;
308 last = a, depth += 1, limit = ilg<saidx_t>(a - first);
315 a = ss_pivot(Td, B, PA, first, last);
316 v = Td[B[PA + B[a]]];
317 SWAP(B[first], B[a]);
320 for(b = first; (++b < last) && ((x = Td[B[PA + B[b]]]) == v);) { }
321 if(((a = b) < last) && (x < v)) {
322 for(; (++b < last) && ((x = Td[B[PA + B[b]]]) <= v);) {
323 if(x == v) { SWAP(B[b], B[a]); ++a; }
326 for(c = last; (b < --c) && ((x = Td[B[PA + B[c]]]) == v);) { }
327 if((b < (d = c)) && (x > v)) {
328 for(; (b < --c) && ((x = Td[B[PA + B[c]]]) >= v);) {
329 if(x == v) { SWAP(B[c], B[d]); --d; }
334 for(; (++b < c) && ((x = Td[B[PA + B[b]]]) <= v);) {
335 if(x == v) { SWAP(B[b], B[a]); ++a; }
337 for(; (b < --c) && ((x = Td[B[PA + B[c]]]) >= v);) {
338 if(x == v) { SWAP(B[c], B[d]); --d; }
345 if((s = a - first) > (t = b - a)) { s = t; }
346 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(B[e], B[f]); }
347 if((s = d - c) > (t = last - d - 1)) { s = t; }
348 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(B[e], B[f]); }
350 a = first + (b - a), c = last - (d - c);
351 b = (v <= Td[B[PA + B[a]] - 1]) ? a : ss_partition(B, PA, a, c, depth);
353 if((a - first) <= (last - c)) {
354 if((last - c) <= (c - b)) {
355 STACK_PUSH(b, c, depth + 1, ilg<saidx_t>(c - b));
356 STACK_PUSH(c, last, depth, limit);
358 }
else if((a - first) <= (c - b)) {
359 STACK_PUSH(c, last, depth, limit);
360 STACK_PUSH(b, c, depth + 1, ilg<saidx_t>(c - b));
363 STACK_PUSH(c, last, depth, limit);
364 STACK_PUSH(first, a, depth, limit);
365 first = b, last = c, depth += 1, limit = ilg<saidx_t>(c - b);
368 if((a - first) <= (c - b)) {
369 STACK_PUSH(b, c, depth + 1, ilg<saidx_t>(c - b));
370 STACK_PUSH(first, a, depth, limit);
372 }
else if((last - c) <= (c - b)) {
373 STACK_PUSH(first, a, depth, limit);
374 STACK_PUSH(b, c, depth + 1, ilg<saidx_t>(c - b));
377 STACK_PUSH(first, a, depth, limit);
378 STACK_PUSH(c, last, depth, limit);
379 first = b, last = c, depth += 1, limit = ilg<saidx_t>(c - b);
384 if(Td[B[PA + B[first]] - 1] < v) {
385 first = ss_partition(B, PA, first, last, depth);
386 limit = ilg<saidx_t>(last - first);
399 #if SS_BLOCKSIZE != 0 401 template<
typename buffer_t>
402 inline void ss_blockswap(buffer_t& B, saidx_t a, saidx_t b, saidx_t n) {
404 for(; 0 < n; --n, ++a, ++b) {
405 t = B[a], B[a] = B[b], B[b] = t;
409 template<
typename buffer_t>
410 inline void ss_rotate(buffer_t& B, saidx_t first, saidx_t middle, saidx_t last) {
413 l = middle - first, r = last - middle;
414 for(; (0 < l) && (0 < r);) {
415 if(l == r) { ss_blockswap(B, first, middle, l);
break; }
417 a = last - 1, b = middle - 1;
420 B[a--] = B[b], B[b--] = B[a];
424 if((r -= l + 1) <= l) {
break; }
425 a -= 1, b = middle - 1;
430 a = first, b = middle;
433 B[a++] = B[b], B[b++] = B[a];
437 if((l -= r + 1) <= r) {
break; }
449 template<
typename buffer_t>
450 inline void ss_inplacemerge(
const sauchar_t *T, buffer_t& B,
const saidx_t PA,
451 saidx_t first, saidx_t middle, saidx_t last,
460 if(B[last - 1] < 0) { x = 1; p = PA + ~B[last - 1]; }
461 else { x = 0; p = PA + B[last - 1]; }
462 for(a = first, len = middle - first, half = len >> 1, r = -1;
464 len = half, half >>= 1) {
466 q = ss_compare(T, B, PA + ((0 <= B[b]) ? B[b] : ~B[b]), B, p, depth);
469 half -= (len & 1) ^ 1;
475 if(r == 0) { B[a] = ~B[a]; }
476 ss_rotate(B, a, middle, last);
479 if(first == middle) {
break; }
482 if(x != 0) {
while(B[--last] < 0) { } }
483 if(middle == last) {
break; }
490 template<
typename buffer_t>
491 inline void ss_mergeforward(
const sauchar_t *T, buffer_t& B, saidx_t PA,
492 saidx_t first, saidx_t middle, saidx_t last,
493 saidx_t buf, saidx_t depth) {
494 saidx_t a, b, c, bufend;
498 bufend = buf + (middle - first) - 1;
499 ss_blockswap(B, buf, first, middle - first);
501 for(t = B[a = first], b = buf, c = middle;;) {
502 r = ss_compare(T, B, PA + B[b], B, PA + B[c], depth);
506 if(bufend <= b) { B[bufend] = t;
return; }
511 B[a++] = B[c], B[c++] = B[a];
513 while(b < bufend) { B[a++] = B[b], B[b++] = B[a]; }
514 B[a] = B[b], B[b] = t;
522 if(bufend <= b) { B[bufend] = t;
return; }
527 B[a++] = B[c], B[c++] = B[a];
529 while(b < bufend) { B[a++] = B[b], B[b++] = B[a]; }
530 B[a] = B[b], B[b] = t;
539 template<
typename buffer_t>
540 inline void ss_mergebackward(
const sauchar_t *T, buffer_t& B, saidx_t PA,
541 saidx_t first, saidx_t middle, saidx_t last,
542 saidx_t buf, saidx_t depth) {
544 saidx_t a, b, c, bufend;
549 bufend = buf + (last - middle) - 1;
550 ss_blockswap(B, buf, middle, last - middle);
553 if(B[bufend] < 0) { p1 = PA + ~B[bufend]; x |= 1; }
554 else { p1 = PA + B[bufend]; }
555 if(B[middle - 1] < 0) { p2 = PA + ~B[middle - 1]; x |= 2; }
556 else { p2 = PA + B[middle - 1]; }
557 for(t = B[a = last - 1], b = bufend, c = middle - 1;;) {
558 r = ss_compare(T, B, p1, B, p2, depth);
560 if(x & 1) {
do { B[a--] = B[b], B[b--] = B[a]; }
while(B[b] < 0); x ^= 1; }
562 if(b <= buf) { B[buf] = t;
break; }
564 if(B[b] < 0) { p1 = PA + ~B[b]; x |= 1; }
565 else { p1 = PA + B[b]; }
567 if(x & 2) {
do { B[a--] = B[c], B[c--] = B[a]; }
while(B[c] < 0); x ^= 2; }
568 B[a--] = B[c], B[c--] = B[a];
570 while(buf < b) { B[a--] = B[b], B[b--] = B[a]; }
571 B[a] = B[b], B[b] = t;
574 if(B[c] < 0) { p2 = PA + ~B[c]; x |= 2; }
575 else { p2 = PA + B[c]; }
577 if(x & 1) {
do { B[a--] = B[b], B[b--] = B[a]; }
while(B[b] < 0); x ^= 1; }
579 if(b <= buf) { B[buf] = t;
break; }
581 if(x & 2) {
do { B[a--] = B[c], B[c--] = B[a]; }
while(B[c] < 0); x ^= 2; }
582 B[a--] = B[c], B[c--] = B[a];
584 while(buf < b) { B[a--] = B[b], B[b--] = B[a]; }
585 B[a] = B[b], B[b] = t;
588 if(B[b] < 0) { p1 = PA + ~B[b]; x |= 1; }
589 else { p1 = PA + B[b]; }
590 if(B[c] < 0) { p2 = PA + ~B[c]; x |= 2; }
591 else { p2 = PA + B[c]; }
597 template<
typename buffer_t>
598 inline void ss_swapmerge(
const sauchar_t *T, buffer_t& B, saidx_t PA,
599 saidx_t first, saidx_t middle, saidx_t last,
600 saidx_t buf, saidx_t bufsize, saidx_t depth) {
601 #define STACK_SIZE SS_SMERGE_STACKSIZE 602 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) 603 #define MERGE_CHECK(a, b, c)\ 606 (((c) & 2) && (ss_compare(T, B, PA + GETIDX(B[(a) - 1]), B, PA + B[a], depth) == 0))) {\ 609 if(((c) & 4) && ((ss_compare(T, B, PA + GETIDX(B[(b) - 1]), B, PA + B[b], depth) == 0))) {\ 613 struct { saidx_t a, b, c; saint_t d; } stack[STACK_SIZE];
614 saidx_t l, r, lm, rm;
615 saidx_t m,
len, half;
619 for(check = 0, ssize = 0;;) {
620 if((last - middle) <= bufsize) {
621 if((first < middle) && (middle < last)) {
622 ss_mergebackward(T, B, PA, first, middle, last, buf, depth);
624 MERGE_CHECK(first, last, check);
625 STACK_POP(first, middle, last, check);
629 if((middle - first) <= bufsize) {
631 ss_mergeforward(T, B, PA, first, middle, last, buf, depth);
633 MERGE_CHECK(first, last, check);
634 STACK_POP(first, middle, last, check);
638 for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
640 len = half, half >>= 1) {
641 if(ss_compare(T, B, PA + GETIDX(B[middle + m + half]),
642 B, PA + GETIDX(B[middle - m - half - 1]), depth) < 0) {
644 half -= (len & 1) ^ 1;
649 lm = middle - m, rm = middle + m;
650 ss_blockswap(B, lm, middle, m);
651 l = r = middle, next = 0;
655 if(first < lm) {
for(; B[--l] < 0;) { } next |= 4; }
657 }
else if(first < lm) {
658 for(; B[r] < 0; ++r) { }
663 if((l - first) <= (last - r)) {
664 STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
665 middle = lm, last = l, check = (check & 3) | (next & 4);
667 if((next & 2) && (r == middle)) { next ^= 6; }
668 STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
669 first = r, middle = rm, check = (next & 3) | (check & 4);
672 if(ss_compare(T, B, PA + GETIDX(B[middle - 1]), B, PA + B[middle], depth) == 0) {
673 B[middle] = ~B[middle];
675 MERGE_CHECK(first, last, check);
676 STACK_POP(first, middle, last, check);
690 template<
typename buffer_t>
691 inline void sssort(
const sauchar_t *T, buffer_t& B, saidx_t PA,
692 saidx_t first, saidx_t last,
693 saidx_t buf, saidx_t bufsize,
694 saidx_t depth, saidx_t n, saint_t lastsuffix) {
696 #if SS_BLOCKSIZE != 0 697 saidx_t b, middle, curbuf;
698 saidx_t j, k, curbufsize, limit;
702 if(lastsuffix != 0) { ++first; }
704 #if SS_BLOCKSIZE == 0 705 ss_mintrosort(T, B, PA, first, last, depth);
707 if((bufsize < SS_BLOCKSIZE) &&
708 (bufsize < (last - first)) &&
709 (bufsize < (limit = ss_isqrt(last - first)))) {
710 if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
711 buf = middle = last - limit, bufsize = limit;
713 middle = last, limit = 0;
715 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
716 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 717 ss_mintrosort(T, B, PA, a, a + SS_BLOCKSIZE, depth);
718 #elif 1 < SS_BLOCKSIZE 719 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
721 curbufsize = last - (a + SS_BLOCKSIZE);
722 curbuf = a + SS_BLOCKSIZE;
723 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
724 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
725 ss_swapmerge(T, B, PA, b - k, b, b + k, curbuf, curbufsize, depth);
728 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 729 ss_mintrosort(T, B, PA, a, middle, depth);
730 #elif 1 < SS_BLOCKSIZE 731 ss_insertionsort(T, PA, a, middle, depth);
733 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
735 ss_swapmerge(T, B, PA, a - k, a, middle, buf, bufsize, depth);
740 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 741 ss_mintrosort(T, B, PA, middle, last, depth);
742 #elif 1 < SS_BLOCKSIZE 743 ss_insertionsort(T, B, PA, middle, last, depth);
745 ss_inplacemerge(T, B, PA, first, middle, last, depth);
749 if(lastsuffix != 0) {
751 saidx_t PAi[2]; PAi[0] = B[PA+B[first - 1]], PAi[1] = n - 2;
752 for(a = first, i = B[first - 1];
753 (a < last) && ((B[a] < 0) || (0 < ss_compare(T, PAi, 0, B, PA + B[a], depth)));
Contains the text compression and encoding framework.