15 #define STATIC_ASSERT(e, msg) typedef char msg[(e) ? 1 : -1] 21 typedef unsigned char uchar;
22 template <
typename T>
struct NaN {
enum { N1 = -1, N2 = -2 }; };
23 template <>
struct NaN <float> {
enum { N1 = 0x7f800001, N2 = 0x7f800002 }; };
24 static const int MAX_ALLOC_SIZE = 1 << 16;
26 template <
typename value_type,
27 const int NO_VALUE = NaN <value_type>::N1,
28 const int NO_PATH = NaN <value_type>::N2,
29 const bool ORDERED =
true,
30 const int MAX_TRIAL = 1,
31 const size_t NUM_TRACKING_NODES = 0>
33 static_assert(
sizeof (value_type) <=
sizeof (
int),
"value_type size is larger than an int");
35 enum error_code { CEDAR_NO_VALUE = NO_VALUE, CEDAR_NO_PATH = NO_PATH, CEDAR_VALUE_LIMIT = 2147483647 };
36 typedef value_type result_type;
37 struct result_pair_type {
41 struct result_triple_type {
47 union {
int base_; value_type value; };
49 node (
const int base__ = 0,
const int check_ = 0)
50 : base_ (base__), check (check_) {}
51 #ifdef USE_REDUCED_TRIE 52 int base ()
const {
return - (base_ + 1); }
54 int base ()
const {
return base_; }
60 ninfo () : sibling (0), child (0) {}
69 block () : prev (0), next (0), num (256), reject (257), trial (0), ehead (0) {}
71 da () : tracking_node (), _array (0), _ninfo (0), _block (0), _bheadF (0), _bheadC (0), _bheadO (0), _capacity (0), _size (0), _no_delete (
false), _reject () {
74 ~da () { clear (
false); }
75 size_t capacity ()
const {
return static_cast <
size_t> (_capacity); }
76 size_t size ()
const {
return static_cast <
size_t> (_size); }
77 size_t total_size ()
const {
return sizeof (node) * _size; }
78 size_t unit_size ()
const {
return sizeof (node); }
79 size_t nonzero_size ()
const {
81 for (
int to = 0; to < _size; ++to)
82 if (_array[to].check >= 0) ++i;
85 size_t num_keys ()
const {
87 for (
int to = 0; to < _size; ++to)
88 #ifdef USE_REDUCED_TRIE
89 if (_array[to].check >= 0 && _array[to].value >= 0) ++i;
91 if (_array[to].check >= 0 && _array[_array[to].check].base () == to) ++i;
97 T exactMatchSearch (
const char* key)
const 98 {
return exactMatchSearch <T> (key, std::strlen (key)); }
100 T exactMatchSearch (
const char* key,
size_t len,
size_t from = 0)
const {
101 union {
int i; value_type x; } b;
103 b.i = _find (key, from, pos, len);
104 if (b.i == CEDAR_NO_PATH) b.i = CEDAR_NO_VALUE;
106 _set_result (&result, b.x, len, from);
109 template <
typename T>
110 size_t commonPrefixSearch (
const char* key, T* result,
size_t result_len)
const 111 {
return commonPrefixSearch (key, result, result_len, std::strlen (key)); }
112 template <
typename T>
113 size_t commonPrefixSearch (
const char* key, T* result,
size_t result_len,
size_t len,
size_t from = 0)
const {
116 union {
int i; value_type x; } b;
117 b.i = _find (key, from,
pos,
pos + 1);
118 if (b.i == CEDAR_NO_VALUE)
continue;
119 if (b.i == CEDAR_NO_PATH)
return num;
120 if (num < result_len) _set_result (&result[num], b.x,
pos, from);
126 template <
typename T>
127 size_t commonPrefixPredict (
const char* key, T* result,
size_t result_len)
128 {
return commonPrefixPredict (key, result, result_len, std::strlen (key)); }
129 template <
typename T>
130 size_t commonPrefixPredict (
const char* key, T* result,
size_t result_len,
size_t len,
size_t from = 0) {
131 size_t num (0),
pos (0), p (0);
132 if (_find (key, from,
pos, len) == CEDAR_NO_PATH)
return 0;
133 union {
int i; value_type x; } b;
135 for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p, root)) {
136 if (num < result_len) _set_result (&result[num], b.x, p, from);
141 void suffix (
char* key,
size_t len,
size_t to)
const {
144 const int from = _array[to].check;
146 = static_cast <
char> (_array[from].base () ^ static_cast <
int> (to));
147 to = static_cast <
size_t> (from);
150 value_type traverse (
const char* key,
size_t& from,
size_t&
pos)
const 151 {
return traverse (key, from, pos, std::strlen (key)); }
152 value_type traverse (
const char* key,
size_t& from,
size_t& pos,
size_t len)
const {
153 union {
int i; value_type x; } b;
154 b.i = _find (key, from, pos, len);
157 struct empty_callback {
void operator () (
const int,
const int) {} };
158 value_type& update (
const char* key)
159 {
return update (key, std::strlen (key)); }
160 value_type& update (
const char* key,
size_t len, value_type val = value_type (0))
161 {
size_t from (0),
pos (0);
return update (key, from, pos, len, val); }
162 value_type& update (
const char* key,
size_t& from,
size_t& pos,
size_t len, value_type val = value_type (0))
163 { empty_callback cf;
return update (key, from, pos, len, val, cf); }
164 template <
typename T>
165 value_type& update (
const char* key,
size_t& from,
size_t& pos,
size_t len, value_type val, T& cf) {
167 _err (__FILE__, __LINE__,
"failed to insert zero-length key\n");
168 #ifndef USE_FAST_LOAD 169 if (! _ninfo || ! _block) restore ();
171 for (
const uchar*
const key_ = reinterpret_cast <const uchar*> (key);
173 #ifdef USE_REDUCED_TRIE 174 const value_type val_ = _array[from].value;
175 if (val_ >= 0 && val_ != CEDAR_VALUE_LIMIT)
176 {
const int to = _follow (from, 0, cf); _array[to].value = val_; }
178 from = static_cast <
size_t> (_follow (from, key_[pos], cf));
180 #ifdef USE_REDUCED_TRIE 181 const int to = _array[from].value >= 0 ? static_cast <
int> (from) : _follow (from, 0, cf);
182 if (_array[to].value == CEDAR_VALUE_LIMIT) _array[to].value = 0;
184 const int to = _follow (from, 0, cf);
186 return _array[to].value += val;
189 int erase (
const char* key) {
return erase (key, std::strlen (key)); }
190 int erase (
const char* key,
size_t len,
size_t from = 0) {
192 const int i = _find (key, from, pos, len);
193 if (i == CEDAR_NO_PATH || i == CEDAR_NO_VALUE)
return -1;
197 void erase (
size_t from) {
199 #ifdef USE_REDUCED_TRIE 200 int e = _array[from].value >= 0 ? static_cast <
int> (from) : _array[from].base () ^ 0;
201 from = static_cast <
size_t> (_array[e].check);
203 int e = _array[from].base () ^ 0;
207 const node& n = _array[from];
208 flag = _ninfo[n.base () ^ _ninfo[from].child].sibling;
209 if (flag) _pop_sibling (from, n.base (), static_cast <uchar> (n.base () ^ e));
211 e = static_cast <
int> (from);
212 from = static_cast <
size_t> (_array[from].check);
215 int build (
size_t num,
const char** key,
const size_t* len = 0,
const value_type* val = 0) {
216 for (
size_t i = 0; i < num; ++i)
217 update (key[i], len ? len[i] : std::strlen (key[i]), val ? val[i] : value_type (i));
220 template <
typename T>
221 void dump (T* result,
const size_t result_len) {
222 union {
int i; value_type x; } b;
223 size_t num (0), from (0), p (0);
224 for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p))
225 if (num < result_len)
226 _set_result (&result[num++], b.x, p, from);
228 _err (__FILE__, __LINE__,
"dump() needs array of length = num_keys()\n");
230 int save (
const char* fn,
const char* mode =
"wb")
const {
232 FILE* fp = std::fopen (fn, mode);
234 std::fwrite (_array,
sizeof (node), static_cast <size_t> (_size), fp);
237 const char*
const info
238 = std::strcat (std::strcpy (
new char[std::strlen (fn) + 5], fn),
".sbl");
239 fp = std::fopen (info, mode);
242 std::fwrite (&_bheadF,
sizeof (
int), 1, fp);
243 std::fwrite (&_bheadC,
sizeof (
int), 1, fp);
244 std::fwrite (&_bheadO,
sizeof (
int), 1, fp);
245 std::fwrite (_ninfo,
sizeof (ninfo), static_cast <size_t> (_size), fp);
246 std::fwrite (_block,
sizeof (block), static_cast <size_t> (_size >> 8), fp);
251 int open (
const char* fn,
const char* mode =
"rb",
252 const size_t offset = 0,
size_t size_ = 0) {
253 FILE* fp = std::fopen (fn, mode);
257 if (std::fseek (fp, 0, SEEK_END) != 0)
return -1;
258 size_ = static_cast <
size_t> (std::ftell (fp));
259 if (std::fseek (fp, 0, SEEK_SET) != 0)
return -1;
261 if (size_ <= offset)
return -1;
264 size_ = (size_ - offset) /
sizeof (node);
265 if (std::fseek (fp, static_cast <long> (offset), SEEK_SET) != 0)
return -1;
266 _array = static_cast <node*> (std::malloc (
sizeof (node) * size_));
268 _ninfo = static_cast <ninfo*> (std::malloc (
sizeof (ninfo) * size_));
269 _block = static_cast <block*> (std::malloc (
sizeof (block) * size_));
270 if (! _array || ! _ninfo || ! _block)
274 _err (__FILE__, __LINE__,
"memory allocation failed\n");
275 if (size_ != std::fread (_array,
sizeof (node), size_, fp))
return -1;
277 _size = static_cast <
int> (size_);
279 const char*
const info
280 = std::strcat (std::strcpy (
new char[std::strlen (fn) + 5], fn),
".sbl");
281 fp = std::fopen (info, mode);
284 std::fread (&_bheadF,
sizeof (
int), 1, fp);
285 std::fread (&_bheadC,
sizeof (
int), 1, fp);
286 std::fread (&_bheadO,
sizeof (
int), 1, fp);
287 if (size_ != std::fread (_ninfo,
sizeof (ninfo), size_, fp) ||
288 size_ != std::fread (_block,
sizeof (block), size_ >> 8, fp) << 8)
295 #ifndef USE_FAST_LOAD 297 if (! _block) _restore_block ();
298 if (! _ninfo) _restore_ninfo ();
302 void set_array (
void* p,
size_t size_ = 0) {
304 _array = static_cast <node*> (p);
305 _size = static_cast <
int> (size_);
308 const void* array ()
const {
return _array; }
309 void clear (
const bool reuse =
true) {
310 if (_array && ! _no_delete) { std::free (_array); _array = 0; }
311 if (_ninfo) { std::free (_ninfo); _ninfo = 0; }
312 if (_block) { std::free (_block); _block = 0; }
313 _bheadF = _bheadC = _bheadO = _capacity = _size = 0;
314 if (reuse) _initialize ();
318 int begin (
size_t& from,
size_t& len) {
319 #ifndef USE_FAST_LOAD 320 if (! _ninfo) _restore_ninfo ();
322 int base = _array[from].base ();
323 uchar c = _ninfo[from].child;
324 if (! from && ! (c = _ninfo[base ^ c].sibling))
325 return CEDAR_NO_PATH;
327 from = static_cast <
size_t> (_array[from].base ()) ^ c;
328 c = _ninfo[from].child;
330 #ifdef USE_REDUCED_TRIE 331 if (_array[from].value >= 0)
return _array[from].value;
333 return _array[_array[from].base () ^ c].base_;
336 int next (
size_t& from,
size_t& len,
const size_t root = 0) {
338 #ifdef USE_REDUCED_TRIE 339 if (_array[from].value < 0)
341 c = _ninfo[_array[from].base () ^ 0].sibling;
342 for (; ! c && from != root; --
len) {
343 c = _ninfo[from].sibling;
344 from = static_cast <
size_t> (_array[from].check);
347 begin (from = static_cast <size_t> (_array[from].base ()) ^ c, ++len) :
351 void test (
const size_t from = 0)
const {
352 const int base = _array[from].base ();
353 uchar c = _ninfo[from].child;
355 if (from) assert (_array[base ^ c].check == static_cast <int> (from));
356 if (c && _array[base ^ c].value < 0)
357 test (static_cast <size_t> (base ^ c));
358 }
while ((c = _ninfo[base ^ c].sibling));
360 size_t tracking_node[NUM_TRACKING_NODES + 1];
376 static void _err (
const char* fn,
const int ln,
const char* msg)
377 { std::fprintf (stderr,
"cedar: %s [%d]: %s", fn, ln, msg); std::exit (1); }
378 template <
typename T>
379 static void _realloc_array (T*& p,
const int size_n,
const int size_p = 0) {
380 void* tmp = std::realloc (p,
sizeof (T) * static_cast <size_t> (size_n));
382 std::free (p), _err (__FILE__, __LINE__,
"memory reallocation failed\n");
383 p = static_cast <T*> (tmp);
384 static const T T0 = T ();
385 for (T* q (p + size_p), *
const r (p + size_n); q != r; ++q) *q = T0;
387 void _initialize () {
388 _realloc_array (_array, 256, 256);
389 _realloc_array (_ninfo, 256);
390 _realloc_array (_block, 1);
391 #ifdef USE_REDUCED_TRIE 392 _array[0] = node (-1, -1);
394 _array[0] = node (0, -1);
396 for (
int i = 1; i < 256; ++i)
397 _array[i] = node (i == 1 ? -255 : - (i - 1), i == 255 ? -1 : - (i + 1));
399 _capacity = _size = 256;
400 for (
size_t i = 0 ; i <= NUM_TRACKING_NODES; ++i) tracking_node[i] = 0;
401 for (
short i = 0; i <= 256; ++i) _reject[i] = i + 1;
404 template <
typename T>
405 int _follow (
size_t& from,
const uchar&
label, T& cf) {
407 const int base = _array[from].base ();
408 if (base < 0 || _array[to = base ^ label].check < 0) {
409 to = _pop_enode (base, label, static_cast <int> (from));
410 _push_sibling (from, to ^ label, label, base >= 0);
411 }
else if (_array[to].check != static_cast <int> (from))
412 to = _resolve (from, base, label, cf);
416 int _find (
const char* key,
size_t& from,
size_t& pos,
const size_t len)
const {
417 for (
const uchar*
const key_ = reinterpret_cast <const uchar*> (key);
419 #ifdef USE_REDUCED_TRIE 420 if (_array[from].value >= 0)
break;
422 size_t to = static_cast <
size_t> (_array[from].base ()); to ^= key_[
pos];
423 if (_array[to].check != static_cast <int> (from))
return CEDAR_NO_PATH;
427 #ifdef USE_REDUCED_TRIE 428 if (_array[from].value >= 0)
429 return pos == len ? _array[from].value : CEDAR_NO_PATH;
431 const node n = _array[_array[from].base () ^ 0];
432 if (n.check != static_cast <int> (from))
return CEDAR_NO_VALUE;
435 #ifndef USE_FAST_LOAD 436 void _restore_ninfo () {
437 _realloc_array (_ninfo, _size);
438 for (
int to = 0; to < _size; ++to) {
439 const int from = _array[to].check;
440 if (from < 0)
continue;
441 const int base = _array[from].base ();
442 if (
const uchar label = static_cast <uchar> (base ^ to))
443 _push_sibling (static_cast <size_t> (from), base, label,
444 ! from || _ninfo[from].child || _array[base ^ 0].check == from);
447 void _restore_block () {
448 _realloc_array (_block, _size >> 8);
449 _bheadF = _bheadC = _bheadO = 0;
450 for (
int bi (0), e (0); e < _size; ++bi) {
451 block& b = _block[bi];
453 for (; e < (bi << 8) + 256; ++e)
454 if (_array[e].check < 0 && ++b.num == 1) b.ehead = e;
455 int& head_out = b.num == 1 ? _bheadC : (b.num == 0 ? _bheadF : _bheadO);
456 _push_block (bi, head_out, ! head_out && b.num);
460 void _set_result (result_type* x, value_type r,
size_t = 0,
size_t = 0)
const 462 void _set_result (result_pair_type* x, value_type r,
size_t l,
size_t = 0)
const 463 { x->value = r; x->length = l; }
464 void _set_result (result_triple_type* x, value_type r,
size_t l,
size_t from)
const 465 { x->value = r; x->length = l; x->id = from; }
466 void _pop_block (
const int bi,
int& head_in,
const bool last) {
470 const block& b = _block[bi];
471 _block[b.prev].next = b.next;
472 _block[b.next].prev = b.prev;
473 if (bi == head_in) head_in = b.next;
476 void _push_block (
const int bi,
int& head_out,
const bool empty) {
477 block& b = _block[bi];
479 head_out = b.prev = b.next = bi;
481 int& tail_out = _block[head_out].prev;
484 head_out = tail_out = _block[tail_out].next = bi;
488 if (_size == _capacity) {
490 _capacity += _size >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : _size;
492 _capacity += _capacity;
494 _realloc_array (_array, _capacity, _capacity);
495 _realloc_array (_ninfo, _capacity, _size);
496 _realloc_array (_block, _capacity >> 8, _size >> 8);
498 _block[_size >> 8].ehead = _size;
499 _array[_size] = node (- (_size + 255), - (_size + 1));
500 for (
int i = _size + 1; i < _size + 255; ++i)
501 _array[i] = node (-(i - 1), -(i + 1));
502 _array[_size + 255] = node (- (_size + 254), -_size);
503 _push_block (_size >> 8, _bheadO, ! _bheadO);
505 return (_size >> 8) - 1;
508 void _transfer_block (
const int bi,
int& head_in,
int& head_out) {
509 _pop_block (bi, head_in, bi == _block[bi].next);
510 _push_block (bi, head_out, ! head_out && _block[bi].num);
513 int _pop_enode (
const int base,
const uchar label,
const int from) {
514 const int e = base < 0 ? _find_place () : base ^
label;
515 const int bi = e >> 8;
517 block& b = _block[bi];
519 if (bi) _transfer_block (bi, _bheadC, _bheadF);
521 _array[-n.base_].check = n.check;
522 _array[-n.check].base_ = n.base_;
523 if (e == b.ehead) b.ehead = -n.check;
524 if (bi && b.num == 1 && b.trial != MAX_TRIAL)
525 _transfer_block (bi, _bheadO, _bheadC);
528 #ifdef USE_REDUCED_TRIE 529 n.value = CEDAR_VALUE_LIMIT; n.check = from;
530 if (base < 0) _array[from].base_ = - (e ^
label) - 1;
532 if (label) n.base_ = -1;
else n.value = value_type (0); n.check = from;
533 if (base < 0) _array[from].base_ = e ^
label;
538 void _push_enode (
const int e) {
539 const int bi = e >> 8;
540 block& b = _block[bi];
543 _array[e] = node (-e, -e);
544 if (bi) _transfer_block (bi, _bheadF, _bheadC);
546 const int prev = b.ehead;
547 const int next = -_array[prev].check;
548 _array[e] = node (-prev, -next);
549 _array[prev].check = _array[next].base_ = -e;
550 if (b.num == 2 || b.trial == MAX_TRIAL)
551 if (bi) _transfer_block (bi, _bheadC, _bheadO);
554 if (b.reject < _reject[b.num]) b.reject = _reject[b.num];
555 _ninfo[e] = ninfo ();
558 void _push_sibling (
const size_t from,
const int base,
const uchar label,
const bool flag =
true) {
559 uchar* c = &_ninfo[from].child;
560 if (flag && (ORDERED ? label > *c : ! *c))
561 do c = &_ninfo[base ^ *c].sibling;
while (ORDERED && *c && *c < label);
562 _ninfo[base ^
label].sibling = *c, *c =
label;
565 void _pop_sibling (
const size_t from,
const int base,
const uchar label) {
566 uchar* c = &_ninfo[from].child;
567 while (*c != label) c = &_ninfo[base ^ *c].sibling;
568 *c = _ninfo[base ^
label].sibling;
571 bool _consult (
const int base_n,
const int base_p, uchar c_n, uchar c_p)
const {
572 do c_n = _ninfo[base_n ^ c_n].sibling, c_p = _ninfo[base_p ^ c_p].sibling;
577 uchar* _set_child (uchar* p,
const int base, uchar c,
const int label = -1) {
579 if (! c) { *++p = c; c = _ninfo[base ^ c].sibling; }
581 while (c && c < label) { *++p = c; c = _ninfo[base ^ c].sibling; }
582 if (label != -1) *++p = static_cast <uchar> (
label);
583 while (c) { *++p = c; c = _ninfo[base ^ c].sibling; }
588 if (_bheadC)
return _block[_bheadC].ehead;
589 if (_bheadO)
return _block[_bheadO].ehead;
590 return _add_block () << 8;
592 int _find_place (
const uchar*
const first,
const uchar*
const last) {
593 if (
int bi = _bheadO) {
594 const int bz = _block[_bheadO].prev;
595 const short nc = static_cast <
short> (last - first + 1);
597 block& b = _block[bi];
598 if (b.num >= nc && nc < b.reject)
599 for (
int e = b.ehead;;) {
600 const int base = e ^ *first;
601 for (
const uchar* p = first; _array[base ^ *++p].check < 0; )
602 if (p == last)
return b.ehead = e;
603 if ((e = -_array[e].check) == b.ehead)
break;
606 if (b.reject < _reject[b.num]) _reject[b.num] = b.reject;
607 const int bi_ = b.next;
608 if (++b.trial == MAX_TRIAL) _transfer_block (bi, _bheadO, _bheadC);
613 return _add_block () << 8;
616 template <
typename T>
617 int _resolve (
size_t& from_n,
const int base_n,
const uchar label_n, T& cf) {
619 const int to_pn = base_n ^ label_n;
620 const int from_p = _array[to_pn].check;
621 const int base_p = _array[from_p].base ();
623 = _consult (base_n, base_p, _ninfo[from_n].child, _ninfo[from_p].child);
625 uchar*
const first = &child[0];
627 flag ? _set_child (first, base_n, _ninfo[from_n].child, label_n)
628 : _set_child (first, base_p, _ninfo[from_p].child);
630 (first == last ? _find_place () : _find_place (first, last)) ^ *first;
632 const int from = flag ? static_cast <
int> (from_n) : from_p;
633 const int base_ = flag ? base_n : base_p;
634 if (flag && *first == label_n) _ninfo[from].child = label_n;
635 #ifdef USE_REDUCED_TRIE 636 _array[from].base_ = -base - 1;
638 _array[from].base_ = base;
640 for (
const uchar* p = first; p <= last; ++p) {
641 const int to = _pop_enode (base, *p, from);
642 const int to_ = base_ ^ *p;
643 _ninfo[to].sibling = (p == last ? 0 : *(p + 1));
644 if (flag && to_ == to_pn)
continue;
646 node& n = _array[to];
647 node& n_ = _array[to_];
648 #ifdef USE_REDUCED_TRIE 649 if ((n.base_ = n_.base_) < 0 && *p)
651 if ((n.base_ = n_.base_) > 0 && *p)
654 uchar c = _ninfo[to].child = _ninfo[to_].child;
655 do _array[n.base () ^ c].check = to;
656 while ((c = _ninfo[n.base () ^ c].sibling));
658 if (! flag && to_ == static_cast <int> (from_n))
659 from_n = static_cast <size_t> (to);
660 if (! flag && to_ == to_pn) {
661 _push_sibling (from_n, to_pn ^ label_n, label_n);
662 _ninfo[to_].child = 0;
663 #ifdef USE_REDUCED_TRIE 664 n_.value = CEDAR_VALUE_LIMIT;
666 if (label_n) n_.base_ = -1;
else n_.value = value_type (0);
668 n_.check = static_cast <
int> (from_n);
671 if (NUM_TRACKING_NODES)
672 for (
size_t j = 0; tracking_node[j] != 0; ++j)
673 if (tracking_node[j] == static_cast <size_t> (to_))
674 { tracking_node[j] = static_cast <
size_t> (to);
break; }
676 return flag ? base ^ label_n : to_pn;
uint_impl_t & operator=(const uint_impl_t &b)
uint64_t label(uint64_t left, uint64_t right)