tudocomp
– The TU Dortmund Compression Framework
TernaryTrie.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <vector>
5 #include <tudocomp/Algorithm.hpp>
7 
8 namespace tdc {
9 namespace lz78 {
10 
16 class TernaryTrie : public Algorithm, public LZ78Trie<> {
17 
18  /*
19  * The trie is not stored in standard form. Each node stores the pointer to its first child (first as first come first served).
20  * The other children are stored in left_sibling/right_sibling of the first child (structured as a binary tree where the first child is the root, and the binary tree is sorted by the character of the trie edge)
21  */
22  std::vector<factorid_t> first_child;
23  std::vector<factorid_t> left_sibling;
24  std::vector<factorid_t> right_sibling;
25  std::vector<uliteral_t> literal;
26 
27 public:
28  inline static Meta meta() {
29  Meta m("lz78trie", "ternary", "Lempel-Ziv 78 Ternary Trie");
30  return m;
31  }
32 
33  //remaining_characters is the number of remaining characters until the complete text is parsed
34  inline TernaryTrie(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t reserve = 0)
35  : Algorithm(std::move(env))
36  , LZ78Trie(n, remaining_characters)
37  {
38  if(reserve > 0) {
39  first_child.reserve(reserve);
40  left_sibling.reserve(reserve);
41  right_sibling.reserve(reserve);
42  literal.reserve(reserve);
43  }
44  }
45 
46  IF_STATS(
47  size_t m_resizes = 0;
48  size_t m_specialresizes = 0;
49  )
50 
51  IF_STATS(
52  MoveGuard m_guard;
53  inline ~TernaryTrie() {
54  if (m_guard) {
55  StatPhase::log("resizes", m_resizes);
56  StatPhase::log("special resizes", m_specialresizes);
57  StatPhase::log("table size", first_child.capacity());
58  StatPhase::log("load ratio", first_child.size()*100/first_child.capacity());
59  }
60  }
61  )
62  TernaryTrie(TernaryTrie&& other) = default;
63  TernaryTrie& operator=(TernaryTrie&& other) = default;
64 
66  DCHECK_EQ(c, size());
67  first_child.push_back(undef_id);
68  left_sibling.push_back(undef_id);
69  right_sibling.push_back(undef_id);
70  literal.push_back(c);
71  return node_t(c, true);
72  }
73 
74  inline node_t get_rootnode(uliteral_t c) const {
75  return node_t(c, false);
76  }
77 
78  inline void clear() {
79  first_child.clear();
80  left_sibling.clear();
81  right_sibling.clear();
82  literal.clear();
83  }
84 
85  inline node_t find_or_insert(const node_t& parent_w, uliteral_t c) {
86  auto parent = parent_w.id();
87 
88  const factorid_t newleaf_id = size();
89 
90  DCHECK_LT(parent, size());
91 
92 
93  if(first_child[parent] == undef_id) {
94  first_child[parent] = newleaf_id;
95  } else {
96  factorid_t node = first_child[parent];
97  while(true) { // search the binary tree stored in parent (following left/right siblings)
98  if(c < literal[node]) {
99  if (left_sibling[node] == undef_id) {
100  left_sibling[node] = newleaf_id;
101  break;
102  }
103  else
104  node = left_sibling[node];
105  }
106  else if (c > literal[node]) {
107  if (right_sibling[node] == undef_id) {
108  right_sibling[node] = newleaf_id;
109  break;
110  }
111  else
112  node = right_sibling[node];
113  }
114  else /* c == literal[node] -> node is the node we want to find */ {
115  return node_t(node, false);
116  }
117  }
118  }
119  // do not double the size if we only need fewer space
120  if(first_child.capacity() == first_child.size()) {
121  const size_t newbound = first_child.size()+expected_number_of_remaining_elements(size());
122  if(newbound < first_child.size()*2 ) {
123  first_child.reserve (newbound);
124  left_sibling.reserve (newbound);
125  right_sibling.reserve (newbound);
126  literal.reserve (newbound);
127  IF_STATS(++m_specialresizes);
128  }
129  IF_STATS(++m_resizes);
130  }
131  first_child.push_back(undef_id);
132  left_sibling.push_back(undef_id);
133  right_sibling.push_back(undef_id);
134  literal.push_back(c);
135  return node_t(size() - 1, true);
136  }
137 
138  inline size_t size() const {
139  return first_child.size();
140  }
141 };
142 
143 }} //ns
144 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
LZ78TrieNode node_t
Definition: LZ78Trie.hpp:43
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
TernaryTrie & operator=(TernaryTrie &&other)=default
node_t find_or_insert(const node_t &parent_w, uliteral_t c)
Definition: TernaryTrie.hpp:85
IF_STATS(size_t m_resizes=0;size_t m_specialresizes=0;) IF_STATS(MoveGuard m_guard
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
factorid_t id() const
Definition: LZ78Trie.hpp:32
Default return type of find_or_insert.
Definition: LZ78Trie.hpp:22
node_t get_rootnode(uliteral_t c) const
Definition: TernaryTrie.hpp:74
TernaryTrie(Env &&env, const size_t n, const size_t &remaining_characters, factorid_t reserve=0)
Definition: TernaryTrie.hpp:34
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11
constexpr factorid_t undef_id
Id that can be used for a non-existing factor.
Definition: LZ78Trie.hpp:14
Local environment for a compression/encoding/decompression call.
node_t add_rootnode(uliteral_t c)
Definition: TernaryTrie.hpp:65
Interface for algorithms.
Definition: Algorithm.hpp:15
size_t expected_number_of_remaining_elements(const size_t z) const
Definition: LZ78Trie.hpp:51
LZ78 Trie Implementation based on Julius Pettersson (MIT/Expat License.) and Juha Nieminen&#39;s work...
Definition: TernaryTrie.hpp:16