41 namespace libdivsufsort {
45 template<
typename buffer_t>
46 inline saidx_t sort_typeBstar(
47 const sauchar_t *T, buffer_t&
SA,
48 saidx_t *bucket_A, saidx_t *bucket_B,
51 saidx_t PAb, ISAb, buf;
52 saidx_t i, j, k, t, m, bufsize;
56 for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
57 for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
62 for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
64 do { ++BUCKET_A(c1 = c0); }
while((0 <= --i) && ((c0 = T[i]) >= c1));
67 ++BUCKET_BSTAR(c0, c1);
70 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
83 for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
86 i = t + BUCKET_B(c0, c0);
87 for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
88 j += BUCKET_BSTAR(c0, c1);
89 BUCKET_BSTAR(c0, c1) = j;
90 i += BUCKET_B(c0, c1);
96 PAb = n - m; ISAb = m;
97 for(i = m - 2; 0 <= i; --i) {
98 t = SA[PAb + i], c0 = T[t], c1 = T[t + 1];
99 SA[--BUCKET_BSTAR(c0, c1)] = i;
101 t = SA[PAb + m - 1], c0 = T[t], c1 = T[t + 1];
102 SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
105 buf = m, bufsize = n - (2 * m);
106 for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
107 for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
108 i = BUCKET_BSTAR(c0, c1);
110 sssort(T, SA, PAb, i, j,
111 buf, bufsize, 2, n, SA[i] == (m - 1));
117 for(i = m - 1; 0 <= i; --i) {
120 do { SA[ISAb + SA[i]] = i; }
while((0 <= --i) && (0 <= SA[i]));
122 if(i <= 0) {
break; }
125 do { SA[i] = ~SA[i]; SA[ISAb + SA[i]] = j; }
while(SA[--i] < 0);
126 SA[ISAb + SA[i]] = j;
130 trsort(SA, ISAb, 0, m, 1);
133 for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
134 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
137 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
138 SA[SA[ISAb + (--j)]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
143 BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n;
144 for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
145 i = BUCKET_A(c0 + 1) - 1;
146 for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
147 t = i - BUCKET_B(c0, c1);
148 BUCKET_B(c0, c1) = i;
151 for(i = t, j = BUCKET_BSTAR(c0, c1);
153 --i, --k) { SA[i] = SA[k]; }
155 BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1;
156 BUCKET_B(c0, c0) = i;
165 template<
typename buffer_t>
166 inline void construct_SA(
167 const sauchar_t *T, buffer_t& SA,
168 saidx_t *bucket_A, saidx_t *bucket_B,
169 saidx_t n, saidx_t m) {
178 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
180 for(i = BUCKET_BSTAR(c1, c1 + 1),
181 j = BUCKET_A(c1 + 1) - 1, k = -1, c2 = -1;
184 if(0 < (s = SA[j])) {
186 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
187 assert(T[s - 1] <= T[s]);
190 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
192 if(0 <= c2) { BUCKET_B(c2, c1) = k; }
194 k = BUCKET_B(c2 = c0, c1);
199 assert(((s == 0) && (T[s] == c1)) || (s < 0));
208 k = BUCKET_A(c2 = T[n - 1]);
209 SA[k++] = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
211 for(i = 0, j = n; i < j; ++i) {
212 if(0 < (s = SA[i])) {
213 assert(T[s - 1] >= T[s]);
215 if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
218 k = BUCKET_A(c2 = c0);
230 template<
typename buffer_t>
231 inline void divsufsort_run(
232 const sauchar_t* T, buffer_t& SA,
233 saidx_t *bucket_A, saidx_t *bucket_B, saidx_t n) {
236 SA[0] = -1; DCHECK(SA[0] < 0) <<
"only signed integer buffers are supported";
238 saidx_t m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
239 construct_SA(T, SA, bucket_A, bucket_B, n, m);
244 inline void divsufsort_run<DynamicIntVector>(
246 saidx_t *bucket_A, saidx_t *bucket_B, saidx_t n) {
248 BufferWrapper<DynamicIntVector> wrapSA(SA);
249 divsufsort_run(T, wrapSA, bucket_A, bucket_B, n);
253 template<
typename buffer_t>
254 inline saint_t divsufsort(
const sauchar_t* T, buffer_t& SA, saidx_t n) {
255 saidx_t *bucket_A, *bucket_B;
260 if((T == NULL) || (n < 0)) {
return -1; }
261 else if(n == 0) {
return 0; }
262 else if(n == 1) { SA[0] = 0;
return 0; }
263 else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1;
return 0; }
265 bucket_A =
new saidx_t[BUCKET_A_SIZE];
266 bucket_B =
new saidx_t[BUCKET_B_SIZE];
269 if((bucket_A != NULL) && (bucket_B != NULL)) {
270 divsufsort_run(T, SA, bucket_A, bucket_B, n);
283 using libdivsufsort::saidx_t;
284 using libdivsufsort::divsufsort;
Contains the text compression and encoding framework.
IntVector< dynamic_t > DynamicIntVector
Represents an integer vector with unspecified (dynamic) bit width.