tudocomp
– The TU Dortmund Compression Framework
RollingTriePlus.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/Algorithm.hpp>
4 #include <tudocomp/util/Hash.hpp>
6 
7 namespace tdc {
8 
9 namespace lz78 {
10 
11 template<
12  typename HashRoller = ZBackupRollingHash,
13  typename HashManager = SizeManagerNoob,
14  typename HashFunction = NoopHasher
15 >
16 class RollingTriePlus : public Algorithm, public LZ78Trie<> {
17  typedef typename HashRoller::key_type key_type;
18  mutable HashRoller m_roller;
21 
22  inline key_type hash_node(uliteral_t c) const {
23  m_roller += c;
24  return m_roller();
25  }
26 
27 public:
28  inline static Meta meta() {
29  Meta m("lz78trie", "rolling_plus", "Rolling Hash Trie+");
30  m.option("hash_roller").templated<HashRoller, ZBackupRollingHash>("hash_roller");
31  m.option("hash_manager").templated<HashManager, SizeManagerNoob>("hash_manager");
32  m.option("hash_function").templated<HashFunction, NoopHasher>("hash_function"); // dummy parameter
33  m.option("load_factor").dynamic(30);
34  return m;
35  }
36  inline RollingTriePlus(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t reserve = 0)
37  : Algorithm(std::move(env))
38  , LZ78Trie(n,remaining_characters)
39  , m_roller(this->env().env_for_option("hash_roller"))
40  , m_table(this->env(), n, remaining_characters)
41  , m_table2(this->env(),n,remaining_characters)
42  {
43  m_table.max_load_factor(this->env().option("load_factor").as_integer()/100.0f );
44  m_table2.max_load_factor(0.95);
45  if(reserve > 0) {
46  m_table.reserve(reserve);
47  }
48  }
49 
50  IF_STATS(
51  MoveGuard m_guard;
52  inline ~RollingTriePlus() {
53  if (m_guard) {
54  if(m_table2.empty()) {
55  m_table.collect_stats(env());
56  } else {
57  m_table2.collect_stats(env());
58  }
59  }
60  }
61  )
62  RollingTriePlus(RollingTriePlus&& other) = default;
63  RollingTriePlus& operator=(RollingTriePlus&& other) = default;
64 
66  m_table.insert(std::make_pair<key_type,factorid_t>(hash_node(c), size()));
67  m_roller.clear();
68  return node_t(size() - 1, true);
69  }
70 
71  inline node_t get_rootnode(uliteral_t c) const {
72  hash_node(c);
73  return node_t(c, false);
74  }
75 
76  inline void clear() {
77 // m_table.clear();
78  }
79 
80  inline node_t find_or_insert(const node_t&, uliteral_t c) {
81  const factorid_t newleaf_id = size();
82 
83 
84 
85  if(!m_table2.empty()) { // already using the second hash table
86  auto ret = m_table2.insert(std::make_pair(hash_node(c), newleaf_id));
87  if(ret.second) {
88  m_roller.clear();
89  return node_t(newleaf_id, true); // added a new node
90  }
91  return node_t(ret.first.value(), false);
92 
93  }
94  // using still the first hash table
95  auto ret = m_table.insert(std::make_pair(hash_node(c), newleaf_id));
96  if(ret.second) {
97  if(tdc_unlikely(m_table.table_size()*m_table.max_load_factor() < m_table.m_entries+1)) {
98  const size_t expected_size = (m_table.m_entries + 1 + lz78_expected_number_of_remaining_elements(m_table.entries(),m_table.m_n,m_table.m_remaining_characters))/0.95;
99  if(expected_size < m_table.table_size()*2.0*0.95) {
100  m_table2.incorporate(m_table, expected_size);
101  }
102 
103  }
104  m_roller.clear();
105  return node_t(newleaf_id, true); // added a new node
106  }
107  return node_t(ret.first.value(), false);
108 
109 
110  }
111 
112  inline factorid_t size() const {
113  return m_table2.empty() ? m_table.entries() : m_table2.entries();
114  }
115 };
116 
117 }} //ns
118 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
const size_t & m_remaining_characters
Definition: Hash.hpp:413
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
LZ78TrieNode node_t
Definition: LZ78Trie.hpp:43
#define tdc_unlikely(x)
Provides a hint to the compiler that x is expected to resolve to false.
Definition: def.hpp:23
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
std::pair< Iterator, bool > insert(std::pair< key_t, value_t > &&value)
Definition: Hash.hpp:518
void reserve(len_t hint)
Definition: Hash.hpp:461
len_t m_entries
Definition: Hash.hpp:382
node_t get_rootnode(uliteral_t c) const
node_t add_rootnode(uliteral_t c)
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
Default return type of find_or_insert.
Definition: LZ78Trie.hpp:22
RollingTriePlus(Env &&env, const size_t n, const size_t &remaining_characters, factorid_t reserve=0)
RollingTriePlus & operator=(RollingTriePlus &&other)=default
len_t table_size() const
Definition: Hash.hpp:515
const size_t m_n
Definition: Hash.hpp:412
len_t empty() const
Definition: Hash.hpp:516
IF_STATS(MoveGuard m_guard;inline ~RollingTriePlus() { if(m_guard) { if(m_table2.empty()) { m_table.collect_stats(env());} else { m_table2.collect_stats(env());} } }) RollingTriePlus(RollingTriePlus &&other)=default
void incorporate(T &&o, len_t newsize)
Definition: Hash.hpp:430
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11
node_t find_or_insert(const node_t &, uliteral_t c)
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
float max_load_factor() const noexcept
Definition: Hash.hpp:409
Local environment for a compression/encoding/decompression call.
len_t entries() const
Definition: Hash.hpp:514
Interface for algorithms.
Definition: Algorithm.hpp:15
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150