tudocomp
– The TU Dortmund Compression Framework
divsufsort_ssort.hpp
Go to the documentation of this file.
1 /*
2  * This file integrates customized parts of divsufsort into tudocomp.
3  * divsufsort is licensed under the MIT License, which follows.
4  *
5  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person
8  * obtaining a copy of this software and associated documentation
9  * files (the "Software"), to deal in the Software without
10  * restriction, including without limitation the rights to use,
11  * copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following
14  * conditions:
15  *
16  * The above copyright notice and this permission notice shall be
17  * included in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26  * OTHER DEALINGS IN THE SOFTWARE.
27  */
28 
29 #pragma once
30 
33 
35 namespace tdc {
36 namespace libdivsufsort {
37 
38 // all below is adapted from ssort.c
39 
40 #if SS_BLOCKSIZE != 0
41 
42 const saint_t sqq_table[256] = {
43  0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
44  64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
45  90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
46 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
47 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
48 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
49 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
50 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
51 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
52 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
53 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
54 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
55 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
56 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
57 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
58 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
59 };
60 
61 inline
62 saidx_t
63 ss_isqrt(saidx_t x) {
64  saidx_t y, e;
65 
66  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
67  e = (x & 0xffff0000) ?
68  ((x & 0xff000000) ?
69  24 + lg_table[(x >> 24) & 0xff] :
70  16 + lg_table[(x >> 16) & 0xff]) :
71  ((x & 0x0000ff00) ?
72  8 + lg_table[(x >> 8) & 0xff] :
73  0 + lg_table[(x >> 0) & 0xff]);
74 
75  if(e >= 16) {
76  y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
77  if(e >= 24) { y = (y + 1 + x / y) >> 1; }
78  y = (y + 1 + x / y) >> 1;
79  } else if(e >= 8) {
80  y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
81  } else {
82  return sqq_table[x] >> 4;
83  }
84 
85  return (x < (y * y)) ? y - 1 : y;
86 }
87 
88 #endif /* SS_BLOCKSIZE != 0 */
89 
90 
91 /*---------------------------------------------------------------------------*/
92 
93 /* Compares two suffixes. */
94 template<typename buffer_t1, typename buffer_t2>
95 inline saint_t ss_compare(const sauchar_t *T,
96  buffer_t1& B1, saidx_t p1,
97  buffer_t2& B2, saidx_t p2,
98  saidx_t depth) {
99  const sauchar_t *U1, *U2, *U1n, *U2n;
100 
101  for(U1 = T + depth + B1[p1],
102  U2 = T + depth + B2[p2],
103  U1n = T + B1[p1 + 1] + 2,
104  U2n = T + B2[p2 + 1] + 2;
105  (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
106  ++U1, ++U2) {
107  }
108 
109  return U1 < U1n ?
110  (U2 < U2n ? *U1 - *U2 : 1) :
111  (U2 < U2n ? -1 : 0);
112 }
113 
114 
115 /*---------------------------------------------------------------------------*/
116 
117 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
118 
119 /* Insertionsort for small size groups */
120 /* Insertionsort for small size groups */
121 template<typename buffer_t>
122 inline void ss_insertionsort(const sauchar_t *T, buffer_t& B, const saidx_t PA,
123  saidx_t first, saidx_t last, saidx_t depth) {
124  saidx_t i, j;
125  saidx_t t;
126  saint_t r;
127 
128  for(i = last - 2; first <= i; --i) {
129  for(t = B[i], j = i + 1; 0 < (r = ss_compare(T, B, PA + t, B, PA + B[j], depth));) {
130  do { B[j - 1] = B[j]; } while((++j < last) && (B[j] < 0));
131  if(last <= j) { break; }
132  }
133  if(r == 0) { B[j] = ~B[j]; }
134  B[j - 1] = t;
135  }
136 }
137 
138 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
139 
140 
141 /*---------------------------------------------------------------------------*/
142 
143 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
144 
145 template<typename buffer_t>
146 inline void ss_fixdown(const sauchar_t *Td, buffer_t& B, const saidx_t PA,
147  saidx_t SA, saidx_t i, saidx_t size) {
148  saidx_t j, k;
149  saidx_t v;
150  saint_t c, d, e;
151 
152  for(v = B[SA + i], c = Td[B[PA + v]]; (j = 2 * i + 1) < size; B[SA + i] = B[SA + k], i = k) {
153  k = j++;
154  d = Td[B[PA + B[SA + k]]];
155  if(d < (e = Td[B[PA + B[SA + j]]])) { k = j; d = e; }
156  if(d <= c) { break; }
157  }
158  B[SA + i] = v;
159 }
160 
161 /* Simple top-down heapsort. */
162 template<typename buffer_t>
163 inline void ss_heapsort(const sauchar_t *Td, buffer_t& B, const saidx_t PA, saidx_t SA, saidx_t size) {
164  saidx_t i, m;
165  saidx_t t;
166 
167  m = size;
168  if((size % 2) == 0) {
169  m--;
170  if(Td[B[PA + B[SA + m / 2]]] < Td[B[PA + B[SA + m]]]) { SWAP(B[SA + m], B[SA + m / 2]); }
171  }
172 
173  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, B, PA, SA, i, m); }
174  if((size % 2) == 0) { SWAP(B[SA + 0], B[SA + m]); ss_fixdown(Td, B, PA, SA, 0, m); }
175  for(i = m - 1; 0 < i; --i) {
176  t = B[SA + 0], B[SA + 0] = B[SA + i];
177  ss_fixdown(Td, B, PA, SA, 0, i);
178  B[SA + i] = t;
179  }
180 }
181 
182 
183 /*---------------------------------------------------------------------------*/
184 
185 /* Returns the median of three elements. */
186 template<typename buffer_t>
187 inline saidx_t ss_median3(const sauchar_t *Td, buffer_t& B, const saidx_t PA,
188  saidx_t v1, saidx_t v2, saidx_t v3) {
189  saidx_t t;
190  if(Td[B[PA + B[v1]]] > Td[B[PA + B[v2]]]) { SWAP(v1, v2); }
191  if(Td[B[PA + B[v2]]] > Td[B[PA + B[v3]]]) {
192  if(Td[B[PA + B[v1]]] > Td[B[PA + B[v3]]]) { return v1; }
193  else { return v3; }
194  }
195  return v2;
196 }
197 
198 /* Returns the median of five elements. */
199 template<typename buffer_t>
200 inline saidx_t ss_median5(const sauchar_t *Td, buffer_t& B, const saidx_t PA,
201  saidx_t v1, saidx_t v2, saidx_t v3, saidx_t v4, saidx_t v5) {
202  saidx_t t;
203  if(Td[B[PA + B[v2]]] > Td[B[PA + B[v3]]]) { SWAP(v2, v3); }
204  if(Td[B[PA + B[v4]]] > Td[B[PA + B[v5]]]) { SWAP(v4, v5); }
205  if(Td[B[PA + B[v2]]] > Td[B[PA + B[v4]]]) { SWAP(v2, v4); SWAP(v3, v5); }
206  if(Td[B[PA + B[v1]]] > Td[B[PA + B[v3]]]) { SWAP(v1, v3); }
207  if(Td[B[PA + B[v1]]] > Td[B[PA + B[v4]]]) { SWAP(v1, v4); SWAP(v3, v5); }
208  if(Td[B[PA + B[v3]]] > Td[B[PA + B[v4]]]) { return v4; }
209  return v3;
210 }
211 
212 /* Returns the pivot element. */
213 template<typename buffer_t>
214 inline saidx_t ss_pivot(const sauchar_t *Td, buffer_t& B, const saidx_t PA, saidx_t first, saidx_t last) {
215  saidx_t middle;
216  saidx_t t;
217 
218  t = last - first;
219  middle = first + t / 2;
220 
221  if(t <= 512) {
222  if(t <= 32) {
223  return ss_median3(Td, B, PA, first, middle, last - 1);
224  } else {
225  t >>= 2;
226  return ss_median5(Td, B, PA, first, first + t, middle, last - 1 - t, last - 1);
227  }
228  }
229  t >>= 3;
230  first = ss_median3(Td, B, PA, first, first + t, first + (t << 1));
231  middle = ss_median3(Td, B, PA, middle - t, middle, middle + t);
232  last = ss_median3(Td, B, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
233  return ss_median3(Td, B, PA, first, middle, last);
234 }
235 
236 
237 /*---------------------------------------------------------------------------*/
238 
239 /* Binary partition for substrings. */
240 template<typename buffer_t>
241 inline saidx_t ss_partition(buffer_t& B, const saidx_t PA,
242  saidx_t first, saidx_t last, saidx_t depth) {
243  saidx_t a, b;
244  saidx_t t;
245  for(a = first - 1, b = last;;) {
246  for(; (++a < b) && ((B[PA + B[a]] + depth) >= (B[PA + B[a] + 1] + 1));) { B[a] = ~B[a]; }
247  for(; (a < --b) && ((B[PA + B[b]] + depth) < (B[PA + B[b] + 1] + 1));) { }
248  if(b <= a) { break; }
249  t = ~B[b];
250  B[b] = B[a];
251  B[a] = t;
252  }
253  if(first < a) { B[first] = ~B[first]; }
254  return a;
255 }
256 
257 /* Multikey introsort for medium size groups. */
258 template<typename buffer_t>
259 inline void ss_mintrosort(const sauchar_t *T, buffer_t& B, const saidx_t PA,
260  saidx_t first, saidx_t last,
261  saidx_t depth) {
262 #define STACK_SIZE SS_MISORT_STACKSIZE
263  struct { saidx_t a, b, c; saint_t d; } stack[STACK_SIZE];
264  const sauchar_t *Td;
265  saidx_t a, b, c, d, e, f;
266  saidx_t s, t;
267  saint_t ssize;
268  saint_t limit;
269  saint_t v, x = 0;
270 
271  for(ssize = 0, limit = ilg<saidx_t>(last - first);;) {
272 
273  if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
274 #if 1 < SS_INSERTIONSORT_THRESHOLD
275  if(1 < (last - first)) { ss_insertionsort(T, B, PA, first, last, depth); }
276 #endif
277  STACK_POP(first, last, depth, limit);
278  continue;
279  }
280 
281  Td = T + depth;
282  if(limit-- == 0) {
283  ss_heapsort(Td, B, PA, first, last - first);
284  }
285  if(limit < 0) {
286  for(a = first + 1, v = Td[B[PA + B[first]]]; a < last; ++a) {
287  if((x = Td[B[PA + B[a]]]) != v) {
288  if(1 < (a - first)) { break; }
289  v = x;
290  first = a;
291  }
292  }
293  if(Td[B[PA + B[first]] - 1] < v) {
294  first = ss_partition(B, PA, first, a, depth);
295  }
296  if((a - first) <= (last - a)) {
297  if(1 < (a - first)) {
298  STACK_PUSH(a, last, depth, -1);
299  last = a, depth += 1, limit = ilg<saidx_t>(a - first);
300  } else {
301  first = a, limit = -1;
302  }
303  } else {
304  if(1 < (last - a)) {
305  STACK_PUSH(first, a, depth + 1, ilg<saidx_t>(a - first));
306  first = a, limit = -1;
307  } else {
308  last = a, depth += 1, limit = ilg<saidx_t>(a - first);
309  }
310  }
311  continue;
312  }
313 
314  /* choose pivot */
315  a = ss_pivot(Td, B, PA, first, last);
316  v = Td[B[PA + B[a]]];
317  SWAP(B[first], B[a]);
318 
319  /* partition */
320  for(b = first; (++b < last) && ((x = Td[B[PA + B[b]]]) == v);) { }
321  if(((a = b) < last) && (x < v)) {
322  for(; (++b < last) && ((x = Td[B[PA + B[b]]]) <= v);) {
323  if(x == v) { SWAP(B[b], B[a]); ++a; }
324  }
325  }
326  for(c = last; (b < --c) && ((x = Td[B[PA + B[c]]]) == v);) { }
327  if((b < (d = c)) && (x > v)) {
328  for(; (b < --c) && ((x = Td[B[PA + B[c]]]) >= v);) {
329  if(x == v) { SWAP(B[c], B[d]); --d; }
330  }
331  }
332  for(; b < c;) {
333  SWAP(B[b], B[c]);
334  for(; (++b < c) && ((x = Td[B[PA + B[b]]]) <= v);) {
335  if(x == v) { SWAP(B[b], B[a]); ++a; }
336  }
337  for(; (b < --c) && ((x = Td[B[PA + B[c]]]) >= v);) {
338  if(x == v) { SWAP(B[c], B[d]); --d; }
339  }
340  }
341 
342  if(a <= d) {
343  c = b - 1;
344 
345  if((s = a - first) > (t = b - a)) { s = t; }
346  for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(B[e], B[f]); }
347  if((s = d - c) > (t = last - d - 1)) { s = t; }
348  for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(B[e], B[f]); }
349 
350  a = first + (b - a), c = last - (d - c);
351  b = (v <= Td[B[PA + B[a]] - 1]) ? a : ss_partition(B, PA, a, c, depth);
352 
353  if((a - first) <= (last - c)) {
354  if((last - c) <= (c - b)) {
355  STACK_PUSH(b, c, depth + 1, ilg<saidx_t>(c - b));
356  STACK_PUSH(c, last, depth, limit);
357  last = a;
358  } else if((a - first) <= (c - b)) {
359  STACK_PUSH(c, last, depth, limit);
360  STACK_PUSH(b, c, depth + 1, ilg<saidx_t>(c - b));
361  last = a;
362  } else {
363  STACK_PUSH(c, last, depth, limit);
364  STACK_PUSH(first, a, depth, limit);
365  first = b, last = c, depth += 1, limit = ilg<saidx_t>(c - b);
366  }
367  } else {
368  if((a - first) <= (c - b)) {
369  STACK_PUSH(b, c, depth + 1, ilg<saidx_t>(c - b));
370  STACK_PUSH(first, a, depth, limit);
371  first = c;
372  } else if((last - c) <= (c - b)) {
373  STACK_PUSH(first, a, depth, limit);
374  STACK_PUSH(b, c, depth + 1, ilg<saidx_t>(c - b));
375  first = c;
376  } else {
377  STACK_PUSH(first, a, depth, limit);
378  STACK_PUSH(c, last, depth, limit);
379  first = b, last = c, depth += 1, limit = ilg<saidx_t>(c - b);
380  }
381  }
382  } else {
383  limit += 1;
384  if(Td[B[PA + B[first]] - 1] < v) {
385  first = ss_partition(B, PA, first, last, depth);
386  limit = ilg<saidx_t>(last - first);
387  }
388  depth += 1;
389  }
390  }
391 #undef STACK_SIZE
392 }
393 
394 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
395 
396 
397 /*---------------------------------------------------------------------------*/
398 
399 #if SS_BLOCKSIZE != 0
400 
401 template<typename buffer_t>
402 inline void ss_blockswap(buffer_t& B, saidx_t a, saidx_t b, saidx_t n) {
403  saidx_t t;
404  for(; 0 < n; --n, ++a, ++b) {
405  t = B[a], B[a] = B[b], B[b] = t;
406  }
407 }
408 
409 template<typename buffer_t>
410 inline void ss_rotate(buffer_t& B, saidx_t first, saidx_t middle, saidx_t last) {
411  saidx_t a, b;
412  saidx_t t, l, r;
413  l = middle - first, r = last - middle;
414  for(; (0 < l) && (0 < r);) {
415  if(l == r) { ss_blockswap(B, first, middle, l); break; }
416  if(l < r) {
417  a = last - 1, b = middle - 1;
418  t = B[a];
419  do {
420  B[a--] = B[b], B[b--] = B[a];
421  if(b < first) {
422  B[a] = t;
423  last = a;
424  if((r -= l + 1) <= l) { break; }
425  a -= 1, b = middle - 1;
426  t = B[a];
427  }
428  } while(1);
429  } else {
430  a = first, b = middle;
431  t = B[a];
432  do {
433  B[a++] = B[b], B[b++] = B[a];
434  if(last <= b) {
435  B[a] = t;
436  first = a + 1;
437  if((l -= r + 1) <= r) { break; }
438  a += 1, b = middle;
439  t = B[a];
440  }
441  } while(1);
442  }
443  }
444 }
445 
446 
447 /*---------------------------------------------------------------------------*/
448 
449 template<typename buffer_t>
450 inline void ss_inplacemerge(const sauchar_t *T, buffer_t& B, const saidx_t PA,
451  saidx_t first, saidx_t middle, saidx_t last,
452  saidx_t depth) {
453  saidx_t p;
454  saidx_t a, b;
455  saidx_t len, half;
456  saint_t q, r;
457  saint_t x;
458 
459  for(;;) {
460  if(B[last - 1] < 0) { x = 1; p = PA + ~B[last - 1]; }
461  else { x = 0; p = PA + B[last - 1]; }
462  for(a = first, len = middle - first, half = len >> 1, r = -1;
463  0 < len;
464  len = half, half >>= 1) {
465  b = a + half;
466  q = ss_compare(T, B, PA + ((0 <= B[b]) ? B[b] : ~B[b]), B, p, depth);
467  if(q < 0) {
468  a = b + 1;
469  half -= (len & 1) ^ 1;
470  } else {
471  r = q;
472  }
473  }
474  if(a < middle) {
475  if(r == 0) { B[a] = ~B[a]; }
476  ss_rotate(B, a, middle, last);
477  last -= middle - a;
478  middle = a;
479  if(first == middle) { break; }
480  }
481  --last;
482  if(x != 0) { while(B[--last] < 0) { } }
483  if(middle == last) { break; }
484  }
485 }
486 
487 /*---------------------------------------------------------------------------*/
488 
489 /* Merge-forward with internal buffer. */
490 template<typename buffer_t>
491 inline void ss_mergeforward(const sauchar_t *T, buffer_t& B, saidx_t PA,
492  saidx_t first, saidx_t middle, saidx_t last,
493  saidx_t buf, saidx_t depth) {
494  saidx_t a, b, c, bufend;
495  saidx_t t;
496  saint_t r;
497 
498  bufend = buf + (middle - first) - 1;
499  ss_blockswap(B, buf, first, middle - first);
500 
501  for(t = B[a = first], b = buf, c = middle;;) {
502  r = ss_compare(T, B, PA + B[b], B, PA + B[c], depth);
503  if(r < 0) {
504  do {
505  B[a++] = B[b];
506  if(bufend <= b) { B[bufend] = t; return; }
507  B[b++] = B[a];
508  } while(B[b] < 0);
509  } else if(r > 0) {
510  do {
511  B[a++] = B[c], B[c++] = B[a];
512  if(last <= c) {
513  while(b < bufend) { B[a++] = B[b], B[b++] = B[a]; }
514  B[a] = B[b], B[b] = t;
515  return;
516  }
517  } while(B[c] < 0);
518  } else {
519  B[c] = ~B[c];
520  do {
521  B[a++] = B[b];
522  if(bufend <= b) { B[bufend] = t; return; }
523  B[b++] = B[a];
524  } while(B[b] < 0);
525 
526  do {
527  B[a++] = B[c], B[c++] = B[a];
528  if(last <= c) {
529  while(b < bufend) { B[a++] = B[b], B[b++] = B[a]; }
530  B[a] = B[b], B[b] = t;
531  return;
532  }
533  } while(B[c] < 0);
534  }
535  }
536 }
537 
538 /* Merge-backward with internal buffer. */
539 template<typename buffer_t>
540 inline void ss_mergebackward(const sauchar_t *T, buffer_t& B, saidx_t PA,
541  saidx_t first, saidx_t middle, saidx_t last,
542  saidx_t buf, saidx_t depth) {
543  saidx_t p1, p2;
544  saidx_t a, b, c, bufend;
545  saidx_t t;
546  saint_t r;
547  saint_t x;
548 
549  bufend = buf + (last - middle) - 1;
550  ss_blockswap(B, buf, middle, last - middle);
551 
552  x = 0;
553  if(B[bufend] < 0) { p1 = PA + ~B[bufend]; x |= 1; }
554  else { p1 = PA + B[bufend]; }
555  if(B[middle - 1] < 0) { p2 = PA + ~B[middle - 1]; x |= 2; }
556  else { p2 = PA + B[middle - 1]; }
557  for(t = B[a = last - 1], b = bufend, c = middle - 1;;) {
558  r = ss_compare(T, B, p1, B, p2, depth);
559  if(0 < r) {
560  if(x & 1) { do { B[a--] = B[b], B[b--] = B[a]; } while(B[b] < 0); x ^= 1; }
561  B[a--] = B[b];
562  if(b <= buf) { B[buf] = t; break; }
563  B[b--] = B[a];
564  if(B[b] < 0) { p1 = PA + ~B[b]; x |= 1; }
565  else { p1 = PA + B[b]; }
566  } else if(r < 0) {
567  if(x & 2) { do { B[a--] = B[c], B[c--] = B[a]; } while(B[c] < 0); x ^= 2; }
568  B[a--] = B[c], B[c--] = B[a];
569  if(c < first) {
570  while(buf < b) { B[a--] = B[b], B[b--] = B[a]; }
571  B[a] = B[b], B[b] = t;
572  break;
573  }
574  if(B[c] < 0) { p2 = PA + ~B[c]; x |= 2; }
575  else { p2 = PA + B[c]; }
576  } else {
577  if(x & 1) { do { B[a--] = B[b], B[b--] = B[a]; } while(B[b] < 0); x ^= 1; }
578  B[a--] = ~B[b];
579  if(b <= buf) { B[buf] = t; break; }
580  B[b--] = B[a];
581  if(x & 2) { do { B[a--] = B[c], B[c--] = B[a]; } while(B[c] < 0); x ^= 2; }
582  B[a--] = B[c], B[c--] = B[a];
583  if(c < first) {
584  while(buf < b) { B[a--] = B[b], B[b--] = B[a]; }
585  B[a] = B[b], B[b] = t;
586  break;
587  }
588  if(B[b] < 0) { p1 = PA + ~B[b]; x |= 1; }
589  else { p1 = PA + B[b]; }
590  if(B[c] < 0) { p2 = PA + ~B[c]; x |= 2; }
591  else { p2 = PA + B[c]; }
592  }
593  }
594 }
595 
596 /* D&C based merge. */
597 template<typename buffer_t>
598 inline void ss_swapmerge(const sauchar_t *T, buffer_t& B, saidx_t PA,
599  saidx_t first, saidx_t middle, saidx_t last,
600  saidx_t buf, saidx_t bufsize, saidx_t depth) {
601 #define STACK_SIZE SS_SMERGE_STACKSIZE
602 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
603 #define MERGE_CHECK(a, b, c)\
604  do {\
605  if(((c) & 1) ||\
606  (((c) & 2) && (ss_compare(T, B, PA + GETIDX(B[(a) - 1]), B, PA + B[a], depth) == 0))) {\
607  B[a] = ~B[a];\
608  }\
609  if(((c) & 4) && ((ss_compare(T, B, PA + GETIDX(B[(b) - 1]), B, PA + B[b], depth) == 0))) {\
610  B[b] = ~B[b];\
611  }\
612  } while(0)
613  struct { saidx_t a, b, c; saint_t d; } stack[STACK_SIZE];
614  saidx_t l, r, lm, rm;
615  saidx_t m, len, half;
616  saint_t ssize;
617  saint_t check, next;
618 
619  for(check = 0, ssize = 0;;) {
620  if((last - middle) <= bufsize) {
621  if((first < middle) && (middle < last)) {
622  ss_mergebackward(T, B, PA, first, middle, last, buf, depth);
623  }
624  MERGE_CHECK(first, last, check);
625  STACK_POP(first, middle, last, check);
626  continue;
627  }
628 
629  if((middle - first) <= bufsize) {
630  if(first < middle) {
631  ss_mergeforward(T, B, PA, first, middle, last, buf, depth);
632  }
633  MERGE_CHECK(first, last, check);
634  STACK_POP(first, middle, last, check);
635  continue;
636  }
637 
638  for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
639  0 < len;
640  len = half, half >>= 1) {
641  if(ss_compare(T, B, PA + GETIDX(B[middle + m + half]),
642  B, PA + GETIDX(B[middle - m - half - 1]), depth) < 0) {
643  m += half + 1;
644  half -= (len & 1) ^ 1;
645  }
646  }
647 
648  if(0 < m) {
649  lm = middle - m, rm = middle + m;
650  ss_blockswap(B, lm, middle, m);
651  l = r = middle, next = 0;
652  if(rm < last) {
653  if(B[rm] < 0) {
654  B[rm] = ~B[rm];
655  if(first < lm) { for(; B[--l] < 0;) { } next |= 4; }
656  next |= 1;
657  } else if(first < lm) {
658  for(; B[r] < 0; ++r) { }
659  next |= 2;
660  }
661  }
662 
663  if((l - first) <= (last - r)) {
664  STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
665  middle = lm, last = l, check = (check & 3) | (next & 4);
666  } else {
667  if((next & 2) && (r == middle)) { next ^= 6; }
668  STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
669  first = r, middle = rm, check = (next & 3) | (check & 4);
670  }
671  } else {
672  if(ss_compare(T, B, PA + GETIDX(B[middle - 1]), B, PA + B[middle], depth) == 0) {
673  B[middle] = ~B[middle];
674  }
675  MERGE_CHECK(first, last, check);
676  STACK_POP(first, middle, last, check);
677  }
678  }
679 #undef STACK_SIZE
680 }
681 
682 #endif /* SS_BLOCKSIZE != 0 */
683 
684 
685 /*---------------------------------------------------------------------------*/
686 
687 /*- Function -*/
688 
689 /* Substring sort */
690 template<typename buffer_t>
691 inline void sssort(const sauchar_t *T, buffer_t& B, saidx_t PA,
692  saidx_t first, saidx_t last,
693  saidx_t buf, saidx_t bufsize,
694  saidx_t depth, saidx_t n, saint_t lastsuffix) {
695  saidx_t a;
696 #if SS_BLOCKSIZE != 0
697  saidx_t b, middle, curbuf;
698  saidx_t j, k, curbufsize, limit;
699 #endif
700  saidx_t i;
701 
702  if(lastsuffix != 0) { ++first; }
703 
704 #if SS_BLOCKSIZE == 0
705  ss_mintrosort(T, B, PA, first, last, depth);
706 #else
707  if((bufsize < SS_BLOCKSIZE) &&
708  (bufsize < (last - first)) &&
709  (bufsize < (limit = ss_isqrt(last - first)))) {
710  if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
711  buf = middle = last - limit, bufsize = limit;
712  } else {
713  middle = last, limit = 0;
714  }
715  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
716 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
717  ss_mintrosort(T, B, PA, a, a + SS_BLOCKSIZE, depth);
718 #elif 1 < SS_BLOCKSIZE
719  ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
720 #endif
721  curbufsize = last - (a + SS_BLOCKSIZE);
722  curbuf = a + SS_BLOCKSIZE;
723  if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
724  for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
725  ss_swapmerge(T, B, PA, b - k, b, b + k, curbuf, curbufsize, depth);
726  }
727  }
728 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
729  ss_mintrosort(T, B, PA, a, middle, depth);
730 #elif 1 < SS_BLOCKSIZE
731  ss_insertionsort(T, PA, a, middle, depth);
732 #endif
733  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
734  if(i & 1) {
735  ss_swapmerge(T, B, PA, a - k, a, middle, buf, bufsize, depth);
736  a -= k;
737  }
738  }
739  if(limit != 0) {
740 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
741  ss_mintrosort(T, B, PA, middle, last, depth);
742 #elif 1 < SS_BLOCKSIZE
743  ss_insertionsort(T, B, PA, middle, last, depth);
744 #endif
745  ss_inplacemerge(T, B, PA, first, middle, last, depth);
746  }
747 #endif
748 
749  if(lastsuffix != 0) {
750  /* Insert last type B* suffix. */
751  saidx_t PAi[2]; PAi[0] = B[PA+B[first - 1]], PAi[1] = n - 2;
752  for(a = first, i = B[first - 1];
753  (a < last) && ((B[a] < 0) || (0 < ss_compare(T, PAi, 0, B, PA + B[a], depth)));
754  ++a) {
755  B[a - 1] = B[a];
756  }
757  B[a - 1] = i;
758  }
759 }
760 
761 }} //ns
763 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
len_compact_t len
Definition: LZSSFactors.hpp:38
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11