tudocomp
– The TU Dortmund Compression Framework
ExtHashTrie.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/Algorithm.hpp>
6 #include <tudocomp/util/Hash.hpp>
8 
9 #include <unordered_map>
10 
11 namespace tdc {
12 namespace lz78 {
13 
14 class ExtHashTrie : public Algorithm, public LZ78Trie<> {
15  typedef std::unordered_map<squeeze_node_t, factorid_t, _VignaHasher> table_t;
16 // typedef rigtorp::HashMap<squeeze_node_t, factorid_t,CLhash> table_t;
17 // typedef ska::flat_hash_map<squeeze_node_t, factorid_t,CLhash> table_t; //ska::power_of_two_std_hash<size_t>> table_t;
18 // typedef ska::flat_hash_map<squeeze_node_t, factorid_t,ska::power_of_two_std_hash<size_t>> table_t;
19 // typedef rigtorp::HashMap<squeeze_node_t, factorid_t,_VignaHasher> table_t;
20 
21  table_t m_table;
22  const size_t m_n;
23  const size_t& m_remaining_characters;
24 
25 public:
26  inline static Meta meta() {
27  Meta m("lz78trie", "exthash", "Hash Trie with external hash table");
28  return m;
29  }
30 
31  inline ExtHashTrie(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t reserve = 0)
32  : Algorithm(std::move(env))
33  , LZ78Trie(n, remaining_characters)
34  , m_n(n)
35  , m_remaining_characters(remaining_characters)
36  {
37  // m_table.max_load_factor(0.9f);
38  if(reserve > 0) {
39  m_table.reserve(reserve);
40  }
41  }
42  // ExtHashTrie(Env&& env, factorid_t reserve = 0) : Algorithm(std::move(env)) {
43  // if(reserve > 0) {
44  // m_table.reserve(reserve);
45  // }
46  // }
47  IF_STATS(
48  MoveGuard m_guard;
49  inline ~ExtHashTrie() {
50  if (m_guard) {
51  StatPhase::log("table size", m_table.bucket_count());
52  StatPhase::log("load factor", m_table.max_load_factor());
53  StatPhase::log("entries", m_table.size());
54  StatPhase::log("load ratio", m_table.load_factor());
55  }
56  }
57  )
58  ExtHashTrie(ExtHashTrie&& other) = default;
59  ExtHashTrie& operator=(ExtHashTrie&& other) = default;
60 
62  m_table.insert(std::make_pair<squeeze_node_t,factorid_t>(create_node(0, c), size()));
63  return node_t(size() - 1, true);
64  }
65 
66  inline node_t get_rootnode(uliteral_t c) const {
67  return node_t(c, false);
68  }
69 
70  inline void clear() {
71  m_table.clear();
72 
73  }
74 
75  inline node_t find_or_insert(const node_t& parent_w, uliteral_t c) {
76  auto parent = parent_w.id();
77  const factorid_t newleaf_id = size();
78 
79  auto ret = m_table.insert(std::make_pair(create_node(parent,c), newleaf_id));
80 
81  if(ret.second) { // added a new node
82  if(tdc_unlikely(m_table.bucket_count()*m_table.max_load_factor() < m_table.size()+1)) {
83  const size_t expected_size = (m_table.size() + 1 + lz78_expected_number_of_remaining_elements(m_table.size(), m_n, m_remaining_characters))/0.95;
84  if(expected_size < m_table.bucket_count()*2.0*0.95) {
85  m_table.reserve(expected_size);
86  }
87  }
88  return node_t(size() - 1, true); // added a new node
89  }
90 
91  return node_t(ret.first->second, false); // return the factor id of that node
92  }
93 
94  inline size_t size() const {
95  return m_table.size();
96  }
97 };
98 
99 }} //ns
100 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
LZ78TrieNode node_t
Definition: LZ78Trie.hpp:43
#define tdc_unlikely(x)
Provides a hint to the compiler that x is expected to resolve to false.
Definition: def.hpp:23
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
node_t find_or_insert(const node_t &parent_w, uliteral_t c)
Definition: ExtHashTrie.hpp:75
node_t add_rootnode(uliteral_t c)
Definition: ExtHashTrie.hpp:61
ExtHashTrie(Env &&env, const size_t n, const size_t &remaining_characters, factorid_t reserve=0)
Definition: ExtHashTrie.hpp:31
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
factorid_t id() const
Definition: LZ78Trie.hpp:32
Default return type of find_or_insert.
Definition: LZ78Trie.hpp:22
node_t get_rootnode(uliteral_t c) const
Definition: ExtHashTrie.hpp:66
ExtHashTrie & operator=(ExtHashTrie &&other)=default
IF_STATS(MoveGuard m_guard;inline ~ExtHashTrie() { if(m_guard) { StatPhase::log("table size", m_table.bucket_count());StatPhase::log("load factor", m_table.max_load_factor());StatPhase::log("entries", m_table.size());StatPhase::log("load ratio", m_table.load_factor());} }) ExtHashTrie(ExtHashTrie &&other)=default
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
size_t size() const
Definition: ExtHashTrie.hpp:94
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11
squeeze_node_t create_node(factorid_t id, uliteral_t c)
Local environment for a compression/encoding/decompression call.
Interface for algorithms.
Definition: Algorithm.hpp:15