tudocomp
– The TU Dortmund Compression Framework
HashTriePlus.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/Algorithm.hpp>
4 #include <tudocomp/util/Hash.hpp>
7 
8 namespace tdc {
9 namespace lz78 {
10 
11 
12 template<class HashFunction = MixHasher, class HashManager = SizeManagerDirect>
13 class HashTriePlus : public Algorithm, public LZ78Trie<> {
16 
17 public:
18  inline static Meta meta() {
19  Meta m("lz78trie", "hash_plus", "Hash Trie+");
20  m.option("hash_function").templated<HashFunction, MixHasher>("hash_function");
21  m.option("hash_manager").templated<HashManager, SizeManagerDirect>("hash_manager");
22  m.option("load_factor").dynamic(30);
23  return m;
24  }
25 
26  inline HashTriePlus(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t reserve = 0)
27  : Algorithm(std::move(env))
28  , LZ78Trie(n,remaining_characters)
29  , m_table(this->env(),n,remaining_characters)
30  , m_table2(this->env(),n,remaining_characters)
31  {
32  m_table.max_load_factor(this->env().option("load_factor").as_integer()/100.0f );
33  m_table2.max_load_factor(0.95);
34  if(reserve > 0) {
35  m_table.reserve(reserve);
36  }
37  }
38 
39  IF_STATS(
40  MoveGuard m_guard;
41  inline ~HashTriePlus() {
42  if (m_guard) {
43  if(m_table2.empty()) {
44  m_table.collect_stats(env());
45  } else {
46  m_table2.collect_stats(env());
47  }
48  }
49  }
50  )
51  HashTriePlus(HashTriePlus&& other) = default;
52  HashTriePlus& operator=(HashTriePlus&& other) = default;
53 
55  DCHECK(m_table2.empty());
56  m_table.insert(std::make_pair<squeeze_node_t,factorid_t>(create_node(0, c), size()));
57  return node_t(size() - 1, true);
58  }
59 
60  inline node_t get_rootnode(uliteral_t c) const {
61  return node_t(c, false);
62  }
63 
64  inline void clear() {
65 // table.clear();
66 
67  }
68 
69  inline node_t find_or_insert(const node_t& parent_w, uliteral_t c) {
70  auto parent = parent_w.id();
71  const factorid_t newleaf_id = size();
72  if(!m_table2.empty()) { // already using the second hash table
73  auto ret = m_table2.insert(std::make_pair(create_node(parent,c), newleaf_id));
74  if(ret.second) {
75  return node_t(newleaf_id, true); // added a new node
76  }
77  return node_t(ret.first.value(), false);
78  }
79  // using still the first hash table
80  auto ret = m_table.insert(std::make_pair(create_node(parent,c), newleaf_id));
81  if(ret.second) {
82  if(tdc_unlikely(m_table.table_size()*m_table.max_load_factor() < m_table.m_entries+1)) {
83  const size_t expected_size = (m_table.m_entries + 1 + lz78_expected_number_of_remaining_elements(m_table.entries(),m_table.m_n,m_table.m_remaining_characters))/0.95;
84  if(expected_size < m_table.table_size()*2.0*0.95) {
85  m_table2.incorporate(m_table, expected_size);
86  }
87 
88  }
89  return node_t(newleaf_id, true); // added a new node
90  }
91  return node_t(ret.first.value(), false);
92  }
93 
94  inline size_t size() const {
95  return m_table2.empty() ? m_table.entries() : m_table2.entries();
96  }
97 };
98 
99 }} //ns
100 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
const size_t & m_remaining_characters
Definition: Hash.hpp:413
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
LZ78TrieNode node_t
Definition: LZ78Trie.hpp:43
#define tdc_unlikely(x)
Provides a hint to the compiler that x is expected to resolve to false.
Definition: def.hpp:23
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
std::pair< Iterator, bool > insert(std::pair< key_t, value_t > &&value)
Definition: Hash.hpp:518
void reserve(len_t hint)
Definition: Hash.hpp:461
len_t m_entries
Definition: Hash.hpp:382
node_t get_rootnode(uliteral_t c) const
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
factorid_t id() const
Definition: LZ78Trie.hpp:32
Default return type of find_or_insert.
Definition: LZ78Trie.hpp:22
len_t table_size() const
Definition: Hash.hpp:515
const size_t m_n
Definition: Hash.hpp:412
len_t empty() const
Definition: Hash.hpp:516
HashTriePlus(Env &&env, const size_t n, const size_t &remaining_characters, factorid_t reserve=0)
void incorporate(T &&o, len_t newsize)
Definition: Hash.hpp:430
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11
squeeze_node_t create_node(factorid_t id, uliteral_t c)
node_t add_rootnode(uliteral_t c)
HashTriePlus & operator=(HashTriePlus &&other)=default
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
node_t find_or_insert(const node_t &parent_w, uliteral_t c)
float max_load_factor() const noexcept
Definition: Hash.hpp:409
Local environment for a compression/encoding/decompression call.
len_t entries() const
Definition: Hash.hpp:514
Interface for algorithms.
Definition: Algorithm.hpp:15
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150
IF_STATS(MoveGuard m_guard;inline ~HashTriePlus() { if(m_guard) { if(m_table2.empty()) { m_table.collect_stats(env());} else { m_table2.collect_stats(env());} } }) HashTriePlus(HashTriePlus &&other)=default