21 inline size_t search_pos()
const {
return m_search_pos; }
28 using cedar_t = ::cedar::da<cedar_factorid_t>;
30 static constexpr uint8_t NULL_ESCAPE_ESCAPE_BYTE = 255;
31 static constexpr uint8_t NULL_ESCAPE_REPLACEMENT_BYTE = 254;
33 static constexpr cedar_factorid_t NO_VALUE =
static_cast<cedar_factorid_t
>(
34 cedar_t::error_code::CEDAR_NO_VALUE);
35 static constexpr cedar_factorid_t NO_PATH =
static_cast<cedar_factorid_t
>(
36 cedar_t::error_code::CEDAR_NO_PATH);
37 static constexpr cedar_factorid_t HIDDEN_ESCAPE_ID = -3;
39 class LzwRootSearchPosMap {
40 std::array<size_t, 256> m_array;
43 DCHECK(c < m_array.size());
47 DCHECK(c < m_array.size());
53 std::unique_ptr<cedar_t> m_trie;
54 cedar_factorid_t m_ids = 0;
55 LzwRootSearchPosMap m_roots;
60 auto letter = (
const char*) &c;
62 cedar_factorid_t searchResult;
66 searchResult = m_trie->traverse(letter, from, pos, 1);
69 if(searchResult != NO_VALUE && searchResult != NO_PATH) {
79 m_trie->update(letter, from, pos, 1, ++m_ids);
81 m_trie->update(letter, from, pos, 1, HIDDEN_ESCAPE_ID);
92 inline void _print(
size_t from,
size_t ind) {
95 bool prev_empty =
false;
96 size_t prev_empty_min = 1;
97 size_t prev_empty_max = 1;
99 auto print_prev_empty = [&]() {
112 if (prev_empty_min != prev_empty_max) {
134 for (
size_t i = 1; i < 256; i++) {
135 auto child_from = from;
136 const char c = uint8_t(i);
138 auto r = t.traverse(&c, child_from, pos, 1);
139 if (r != NO_PATH && r != NO_VALUE) {
141 prev_empty_min = i + 1;
154 _print(child_from, ind + 4);
163 inline void print() {
171 Meta m(
"lz78trie",
"cedar",
"Lempel-Ziv 78 Cedar Trie");
178 , m_trie(
std::make_unique<cedar_t>()) {}
181 cedar_factorid_t ids = c;
182 DCHECK(m_ids == ids);
187 if (c != 0 && c != NULL_ESCAPE_ESCAPE_BYTE) {
188 const char* letter = (
const char*) &c;
191 m_trie->update(letter, from, pos, 1, ids);
193 search_pos =
size_t{ from };
201 auto null_escape_byte = NULL_ESCAPE_ESCAPE_BYTE;
202 letter = (
const char*) &null_escape_byte;
203 auto res = m_trie->traverse(letter, from, pos, 1);
205 if (res == NO_PATH || res == NO_VALUE) {
207 m_trie->update(letter, from, pos, 1, cedar_factorid_t(HIDDEN_ESCAPE_ID));
213 if (c == 0) c2 = NULL_ESCAPE_REPLACEMENT_BYTE;
214 letter = (
const char*) &c2;
215 m_trie->update(letter, from, pos, 1, ids);
218 search_pos =
size_t{ from };
220 auto r =
node_t(ids,
true, search_pos);
221 m_roots.set(c, search_pos);
234 return node_t(c,
false, m_roots.get(c));
240 m_trie = std::make_unique<cedar_t>();
242 m_roots = LzwRootSearchPosMap();
255 auto r1 = _find_or_insert(parent, NULL_ESCAPE_ESCAPE_BYTE,
false);
256 auto r2 = _find_or_insert(r1, NULL_ESCAPE_REPLACEMENT_BYTE,
true);
258 }
else if (c == NULL_ESCAPE_ESCAPE_BYTE) {
259 auto r1 = _find_or_insert(parent, NULL_ESCAPE_ESCAPE_BYTE,
false);
260 auto r2 = _find_or_insert(r1, NULL_ESCAPE_ESCAPE_BYTE,
true);
263 r = _find_or_insert(parent, c,
true);
node_t find_or_insert(const node_t &parent, uliteral_t c)
Contains the text compression and encoding framework.
uint8_t uliteral_t
Type to represent signed single literals.
CedarTrieNode(factorid_t id, bool is_new, size_t search_pos)
node_t get_rootnode(uliteral_t c) const
Default return type of find_or_insert.
CedarTrie(Env &&env, const size_t n, const size_t &remaining_characters, factorid_t=0)
size_t search_pos() const
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Local environment for a compression/encoding/decompression call.
Interface for algorithms.
node_t add_rootnode(const uliteral_t c)