tudocomp
– The TU Dortmund Compression Framework
LZ78Trie.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <limits>
4 #include <cstddef>
5 #include <cstdint>
6 #include <tudocomp/def.hpp>
7 namespace tdc {
8 namespace lz78 {
9 
11 using factorid_t = uint32_t;
12 
14 constexpr factorid_t undef_id = std::numeric_limits<factorid_t>::max();
15 
17 constexpr size_t DMS_MAX = std::numeric_limits<factorid_t>::max();
18 
19 // NB: Also update the Lz78 chapter in the docs in case of changes to this file
20 
22 class LZ78TrieNode {
23  factorid_t m_id;
24  bool m_is_new;
25 public:
26  inline LZ78TrieNode(factorid_t id, bool is_new):
27  m_id(id), m_is_new(is_new) {}
28  inline LZ78TrieNode():
29  LZ78TrieNode(0, false) {}
30 
31  inline bool is_new() const { return m_is_new; }
32  inline factorid_t id() const { return m_id; }
33 };
34 
35 #define LZ78_DICT_SIZE_DESC \
36  "`dict_size` has to either be 0 (unlimited), or a positive integer,\n" \
37  "and determines the maximum size of the backing storage of\n" \
38  "the dictionary before it gets reset."
39 
40 template<typename _node_t = LZ78TrieNode>
41 class LZ78Trie {
42 public:
43  using node_t = _node_t;
44 private:
45  const size_t m_n;
46  const size_t& m_remaining_characters;
47 protected:
48  LZ78Trie(const size_t n, const size_t& remaining_characters)
49  : m_n(n), m_remaining_characters(remaining_characters) {}
50 
51  inline size_t expected_number_of_remaining_elements(const size_t z) const {
52  return lz78_expected_number_of_remaining_elements(z, m_n, m_remaining_characters);
53  }
54 
62  CHECK(false) << "This needs to be implemented by a inheriting class";
63  return 0;
64  }
65 
69  inline node_t get_rootnode(uliteral_t c) const {
70  CHECK(false) << "This needs to be implemented by a inheriting class";
71  return 0;
72  }
73 
78  inline void clear() {
79  CHECK(false) << "This needs to be implemented by a inheriting class";
80  }
81 
87  inline node_t find_or_insert(const node_t& parent, uliteral_t c) {
88  CHECK(false) << "This needs to be implemented by a inheriting class";
89  return 0;
90  }
91 
95  inline size_t size() const {
96  CHECK(false) << "This needs to be implemented by a inheriting class";
97  return 0;
98  }
99 
100 };
101 
102 
103 }}//ns
104 
node_t get_rootnode(uliteral_t c) const
Returns the root node corresponding to literal c.
Definition: LZ78Trie.hpp:69
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
node_t add_rootnode(uliteral_t c)
The dictionary can store multiple root nodes For LZ78, we use a root node with the id = c = 0...
Definition: LZ78Trie.hpp:61
void clear()
Erases the contents of the dictionary.
Definition: LZ78Trie.hpp:78
LZ78Trie(const size_t n, const size_t &remaining_characters)
Definition: LZ78Trie.hpp:48
factorid_t id() const
Definition: LZ78Trie.hpp:32
bool is_new() const
Definition: LZ78Trie.hpp:31
Default return type of find_or_insert.
Definition: LZ78Trie.hpp:22
size_t size() const
Returns the number of entries, plus the number of rootnodes.
Definition: LZ78Trie.hpp:95
LZ78TrieNode(factorid_t id, bool is_new)
Definition: LZ78Trie.hpp:26
constexpr size_t DMS_MAX
Maximum legal dictionary size.
Definition: LZ78Trie.hpp:17
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11
constexpr factorid_t undef_id
Id that can be used for a non-existing factor.
Definition: LZ78Trie.hpp:14
node_t find_or_insert(const node_t &parent, uliteral_t c)
Searches a pair (parent, c).
Definition: LZ78Trie.hpp:87
size_t expected_number_of_remaining_elements(const size_t z) const
Definition: LZ78Trie.hpp:51