36 namespace libdivsufsort {
41 template<
typename buffer_t>
42 inline void tr_insertionsort(buffer_t& B,
const saidx_t ISAd, saidx_t first, saidx_t last) {
46 for(a = first + 1; a < last; ++a) {
47 for(t = B[a], b = a - 1; 0 > (r = B[ISAd + t] - B[ISAd + B[b]]);) {
48 do { B[b + 1] = B[b]; }
while((first <= --b) && (B[b] < 0));
49 if(b < first) {
break; }
51 if(r == 0) { B[b] = ~B[b]; }
59 template<
typename buffer_t>
60 inline void tr_fixdown(buffer_t& B,
const saidx_t ISAd, saidx_t
SA, saidx_t i, saidx_t size) {
65 for(v = B[SA + i], c = B[ISAd + v]; (j = 2 * i + 1) < size; B[SA + i] = B[SA + k], i = k) {
67 d = B[ISAd + B[SA + k]];
68 if(d < (e = B[ISAd + B[SA + j]])) { k = j; d = e; }
75 template<
typename buffer_t>
76 inline void tr_heapsort(buffer_t& B,
const saidx_t ISAd, saidx_t SA, saidx_t size) {
83 if(B[ISAd + B[SA + m / 2]] < B[ISAd + B[SA + m]]) { SWAP(B[SA + m], B[SA + m / 2]); }
86 for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(B, ISAd, SA, i, m); }
87 if((size % 2) == 0) { SWAP(B[SA + 0], B[SA + m]); tr_fixdown(B, ISAd, SA, 0, m); }
88 for(i = m - 1; 0 < i; --i) {
89 t = B[SA + 0], B[SA + 0] = B[SA + i];
90 tr_fixdown(B, ISAd, SA, 0, i);
99 template<
typename buffer_t>
100 inline saidx_t tr_median3(buffer_t& B,
const saidx_t ISAd, saidx_t v1, saidx_t v2, saidx_t v3) {
102 if(B[ISAd + B[v1]] > B[ISAd + B[v2]]) { SWAP(v1, v2); }
103 if(B[ISAd + B[v2]] > B[ISAd + B[v3]]) {
104 if(B[ISAd + B[v1]] > B[ISAd + B[v3]]) {
return v1; }
111 template<
typename buffer_t>
112 inline saidx_t tr_median5(buffer_t& B,
const saidx_t ISAd,
113 saidx_t v1, saidx_t v2, saidx_t v3, saidx_t v4, saidx_t v5) {
115 if(B[ISAd + B[v2]] > B[ISAd + B[v3]]) { SWAP(v2, v3); }
116 if(B[ISAd + B[v4]] > B[ISAd + B[v5]]) { SWAP(v4, v5); }
117 if(B[ISAd + B[v2]] > B[ISAd + B[v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
118 if(B[ISAd + B[v1]] > B[ISAd + B[v3]]) { SWAP(v1, v3); }
119 if(B[ISAd + B[v1]] > B[ISAd + B[v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
120 if(B[ISAd + B[v3]] > B[ISAd + B[v4]]) {
return v4; }
125 template<
typename buffer_t>
126 inline saidx_t tr_pivot(buffer_t& B,
const saidx_t ISAd, saidx_t first, saidx_t last) {
131 middle = first + t / 2;
135 return tr_median3(B, ISAd, first, middle, last - 1);
138 return tr_median5(B, ISAd, first, first + t, middle, last - 1 - t, last - 1);
142 first = tr_median3(B, ISAd, first, first + t, first + (t << 1));
143 middle = tr_median3(B, ISAd, middle - t, middle, middle + t);
144 last = tr_median3(B, ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
145 return tr_median3(B, ISAd, first, middle, last);
151 typedef struct _trbudget_t trbudget_t;
159 inline void trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
160 budget->chance = chance;
161 budget->remain = budget->incval = incval;
164 inline saint_t trbudget_check(trbudget_t *budget, saidx_t size) {
165 if(size <= budget->remain) { budget->remain -= size;
return 1; }
166 if(budget->chance == 0) { budget->count += size;
return 0; }
167 budget->remain += budget->incval - size;
175 template<
typename buffer_t>
176 inline void tr_partition(buffer_t& B,
const saidx_t ISAd,
177 saidx_t first, saidx_t middle, saidx_t last,
178 saidx_t *pa, saidx_t *pb, saidx_t v) {
179 saidx_t a, b, c, d, e, f;
183 for(b = middle - 1; (++b < last) && ((x = B[ISAd + B[b]]) == v);) { }
184 if(((a = b) < last) && (x < v)) {
185 for(; (++b < last) && ((x = B[ISAd + B[b]]) <= v);) {
186 if(x == v) { SWAP(B[b], B[a]); ++a; }
189 for(c = last; (b < --c) && ((x = B[ISAd + B[c]]) == v);) { }
190 if((b < (d = c)) && (x > v)) {
191 for(; (b < --c) && ((x = B[ISAd + B[c]]) >= v);) {
192 if(x == v) { SWAP(B[c], B[d]); --d; }
197 for(; (++b < c) && ((x = B[ISAd + B[b]]) <= v);) {
198 if(x == v) { SWAP(B[b], B[a]); ++a; }
200 for(; (b < --c) && ((x = B[ISAd + B[c]]) >= v);) {
201 if(x == v) { SWAP(B[c], B[d]); --d; }
207 if((s = a - first) > (t = b - a)) { s = t; }
208 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(B[e], B[f]); }
209 if((s = d - c) > (t = last - d - 1)) { s = t; }
210 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(B[e], B[f]); }
211 first += (b - a), last -= (d - c);
213 *pa = first, *pb = last;
216 template<
typename buffer_t>
217 inline void tr_copy(buffer_t& B, saidx_t
ISA,
const saidx_t SA,
218 saidx_t first, saidx_t a, saidx_t b, saidx_t last,
226 for(c = first, d = a - 1; c <= d; ++c) {
227 if((0 <= (s = B[c] - depth)) && (B[ISA + s] == v)) {
232 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
233 if((0 <= (s = B[c] - depth)) && (B[ISA + s] == v)) {
240 template<
typename buffer_t>
241 inline void tr_partialcopy(buffer_t& B, saidx_t ISA,
const saidx_t SA,
242 saidx_t first, saidx_t a, saidx_t b, saidx_t last,
246 saidx_t rank, lastrank, newrank = -1;
250 for(c = first, d = a - 1; c <= d; ++c) {
251 if((0 <= (s = B[c] - depth)) && (B[ISA + s] == v)) {
253 rank = B[ISA + s + depth];
254 if(lastrank != rank) { lastrank = rank; newrank = d -
SA; }
255 B[ISA + s] = newrank;
260 for(e = d; first <= e; --e) {
261 rank = B[ISA + B[e]];
262 if(lastrank != rank) { lastrank = rank; newrank = e -
SA; }
263 if(newrank != rank) { B[ISA + B[e]] = newrank; }
267 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
268 if((0 <= (s = B[c] - depth)) && (B[ISA + s] == v)) {
270 rank = B[ISA + s + depth];
271 if(lastrank != rank) { lastrank = rank; newrank = d -
SA; }
272 B[ISA + s] = newrank;
277 template<
typename buffer_t>
278 inline void tr_introsort(buffer_t& B, saidx_t ISA, saidx_t ISAd,
279 saidx_t SA, saidx_t first, saidx_t last,
280 trbudget_t *budget) {
281 #define STACK_SIZE TR_STACKSIZE 282 struct { saidx_t a, b, c; saint_t d, e; }stack[STACK_SIZE];
286 saidx_t incr = ISAd -
ISA;
288 saint_t ssize, trlink = -1;
290 for(ssize = 0, limit = ilg<saidx_t>(last - first);;) {
295 tr_partition(B, ISAd - incr, first, first, last, &a, &b, last - SA - 1);
299 for(c = first, v = a - SA - 1; c < a; ++c) { B[ISA + B[c]] = v; }
302 for(c = a, v = b - SA - 1; c < b; ++c) { B[ISA + B[c]] = v; }
307 STACK_PUSH5(-1, a, b, 0, 0);
308 STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
311 if((a - first) <= (last - b)) {
312 if(1 < (a - first)) {
313 STACK_PUSH5(ISAd, b, last, ilg<saidx_t>(last - b), trlink);
314 last = a, limit = ilg<saidx_t>(a - first);
315 }
else if(1 < (last - b)) {
316 first = b, limit = ilg<saidx_t>(last - b);
318 STACK_POP5(ISAd, first, last, limit, trlink);
322 STACK_PUSH5(ISAd, first, a, ilg<saidx_t>(a - first), trlink);
323 first = b, limit = ilg<saidx_t>(last - b);
324 }
else if(1 < (a - first)) {
325 last = a, limit = ilg<saidx_t>(a - first);
327 STACK_POP5(ISAd, first, last, limit, trlink);
330 }
else if(limit == -2) {
332 a = stack[--ssize].b, b = stack[ssize].c;
333 if(stack[ssize].d == 0) {
334 tr_copy(B, ISA, SA, first, a, b, last, ISAd - ISA);
336 if(0 <= trlink) { stack[trlink].d = -1; }
337 tr_partialcopy(B, ISA, SA, first, a, b, last, ISAd - ISA);
339 STACK_POP5(ISAd, first, last, limit, trlink);
344 do { B[ISA + B[a]] = a -
SA; }
while((++a < last) && (0 <= B[a]));
348 a = first;
do { B[a] = ~B[a]; }
while(B[++a] < 0);
349 next = (B[ISA + B[a]] != B[ISAd + B[a]]) ? ilg<saidx_t>(a - first + 1) : -1;
350 if(++a < last) {
for(b = first, v = a - SA - 1; b < a; ++b) { B[ISA + B[b]] = v; } }
353 if(trbudget_check(budget, a - first)) {
354 if((a - first) <= (last - a)) {
355 STACK_PUSH5(ISAd, a, last, -3, trlink);
356 ISAd += incr, last = a, limit = next;
359 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
360 first = a, limit = -3;
362 ISAd += incr, last = a, limit = next;
366 if(0 <= trlink) { stack[trlink].d = -1; }
368 first = a, limit = -3;
370 STACK_POP5(ISAd, first, last, limit, trlink);
374 STACK_POP5(ISAd, first, last, limit, trlink);
380 if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
381 tr_insertionsort(B, ISAd, first, last);
387 tr_heapsort(B, ISAd, first, last - first);
388 for(a = last - 1; first < a; a = b) {
389 for(x = B[ISAd + B[a]], b = a - 1; (first <= b) && (B[ISAd + B[b]] == x); --b) { B[b] = ~B[b]; }
396 a = tr_pivot(B, ISAd, first, last);
397 SWAP(B[first], B[a]);
398 v = B[ISAd + B[first]];
401 tr_partition(B, ISAd, first, first + 1, last, &a, &b, v);
402 if((last - first) != (b - a)) {
403 next = (B[ISA + B[a]] != v) ? ilg<saidx_t>(b - a) : -1;
406 for(c = first, v = a - SA - 1; c < a; ++c) { B[ISA + B[c]] = v; }
407 if(b < last) {
for(c = a, v = b - SA - 1; c < b; ++c) { B[ISA + B[c]] = v; } }
410 if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
411 if((a - first) <= (last - b)) {
412 if((last - b) <= (b - a)) {
413 if(1 < (a - first)) {
414 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
415 STACK_PUSH5(ISAd, b, last, limit, trlink);
417 }
else if(1 < (last - b)) {
418 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
421 ISAd += incr, first = a, last = b, limit = next;
423 }
else if((a - first) <= (b - a)) {
424 if(1 < (a - first)) {
425 STACK_PUSH5(ISAd, b, last, limit, trlink);
426 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
429 STACK_PUSH5(ISAd, b, last, limit, trlink);
430 ISAd += incr, first = a, last = b, limit = next;
433 STACK_PUSH5(ISAd, b, last, limit, trlink);
434 STACK_PUSH5(ISAd, first, a, limit, trlink);
435 ISAd += incr, first = a, last = b, limit = next;
438 if((a - first) <= (b - a)) {
440 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
441 STACK_PUSH5(ISAd, first, a, limit, trlink);
443 }
else if(1 < (a - first)) {
444 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
447 ISAd += incr, first = a, last = b, limit = next;
449 }
else if((last - b) <= (b - a)) {
451 STACK_PUSH5(ISAd, first, a, limit, trlink);
452 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
455 STACK_PUSH5(ISAd, first, a, limit, trlink);
456 ISAd += incr, first = a, last = b, limit = next;
459 STACK_PUSH5(ISAd, first, a, limit, trlink);
460 STACK_PUSH5(ISAd, b, last, limit, trlink);
461 ISAd += incr, first = a, last = b, limit = next;
465 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
466 if((a - first) <= (last - b)) {
467 if(1 < (a - first)) {
468 STACK_PUSH5(ISAd, b, last, limit, trlink);
470 }
else if(1 < (last - b)) {
473 STACK_POP5(ISAd, first, last, limit, trlink);
477 STACK_PUSH5(ISAd, first, a, limit, trlink);
479 }
else if(1 < (a - first)) {
482 STACK_POP5(ISAd, first, last, limit, trlink);
487 if(trbudget_check(budget, last - first)) {
488 limit = ilg<saidx_t>(last - first), ISAd += incr;
490 if(0 <= trlink) { stack[trlink].d = -1; }
491 STACK_POP5(ISAd, first, last, limit, trlink);
505 template<
typename buffer_t>
506 inline void trsort(buffer_t& B, saidx_t ISA, saidx_t SA, saidx_t n, saidx_t depth) {
510 saidx_t t, skip, unsorted;
512 trbudget_init(&budget, ilg<saidx_t>(n) * 2 / 3, n);
513 for(ISAd = ISA + depth; -n < B[
SA]; ISAd += ISAd -
ISA) {
518 if((t = B[first]) < 0) { first -= t; skip += t; }
520 if(skip != 0) { B[first + skip] = skip; skip = 0; }
521 last = SA + B[ISA + t] + 1;
522 if(1 < (last - first)) {
524 tr_introsort(B, ISA, ISAd, SA, first, last, &budget);
525 if(budget.count != 0) { unsorted += budget.count; }
526 else { skip = first - last; }
527 }
else if((last - first) == 1) {
532 }
while(first < (SA + n));
533 if(skip != 0) { B[first + skip] = skip; }
534 if(unsorted == 0) {
break; }
Contains the text compression and encoding framework.