tudocomp
– The TU Dortmund Compression Framework
MultiMapBuffer.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/def.hpp>
4 #include <tudocomp/Algorithm.hpp>
6 
8 
9 namespace tdc {
10 namespace lcpcomp {
11 
12 class MultimapBuffer : public Algorithm {
13  public:
14  inline static Meta meta() {
15  Meta m("lcpcomp_dec", "MultimapListBuffer");
16  m.option("lazy").dynamic(0);
17  return m;
18  }
19 
20 private:
21  std::vector<uliteral_t> m_buffer;
22  std::unordered_multimap<len_compact_t, len_compact_t> m_fwd;
23  BitVector m_decoded;
24 
25  len_t m_cursor;
26  len_t m_longest_chain;
27  len_t m_current_chain;
28 
29  //storing factors
30  std::vector<len_compact_t> m_target_pos;
31  std::vector<len_compact_t> m_source_pos;
32  std::vector<len_compact_t> m_length;
33 
34  const size_t m_lazy; // number of lazy rounds
35 
36  inline void decode_literal_at(len_t pos, uliteral_t c) {
37  ++m_current_chain;
38  m_longest_chain = std::max(m_longest_chain, m_current_chain);
39 
40  m_buffer[pos] = c;
41  m_decoded[pos] = 1;
42  // {
43 // const auto range = m_fwd.equal_range(pos);
44 // for (auto it = range.first; it != range.second; ++it) {
45 // decode_literal_at(it->second, c); // recursion
46 // }
47 // }
48 // m_fwd.erase(pos);
49  //
50  //
51  //
52  for(auto it = m_fwd.find(pos); it != m_fwd.end(); it = m_fwd.find(pos)) { //TODO: replace with equal_range!
53  decode_literal_at(it->second, c); // recursion
54  m_fwd.erase(it);
55  }
56 
57  // for(auto it = m_fwd.find(pos); it != m_fwd.end(); it = m_fwd.find(pos)) { //TODO: replace with equal_range!
58  // decode_literal_at(it->second, c); // recursion
59  // }
60  // m_fwd.erase(pos);
61 
62  --m_current_chain;
63  }
64 
65  inline void decode_lazy_() {
66  const len_t factors = m_source_pos.size();
67  for(len_t j = 0; j < factors; ++j) {
68  const len_compact_t& target_position = m_target_pos[j];
69  const len_compact_t& source_position = m_source_pos[j];
70  const len_compact_t& factor_length = m_length[j];
71  for(len_t i = 0; i < factor_length; ++i) {
72  if(m_decoded[source_position+i]) {
73  m_buffer[target_position+i] = m_buffer[source_position+i];
74  m_decoded[target_position+i] = 1;
75  }
76  }
77  }
78  }
79 
80 public:
81  inline MultimapBuffer(Env&& env, len_t size)
82  : Algorithm(std::move(env)), m_cursor(0), m_longest_chain(0), m_current_chain(0), m_lazy(this->env().option("lazy").as_integer())
83  {
84  m_fwd.max_load_factor(0.8);
85  m_buffer.resize(size, 0);
86  m_decoded = BitVector(size, 0);
87  }
88 
89  inline void decode_literal(uliteral_t c) {
90  decode_literal_at(m_cursor++, c);
91  }
92 
93  inline void decode_factor(const len_t source_position, const len_t factor_length) {
94  bool factor_stored = false;
95  for(len_t i = 0; i < factor_length; ++i) {
96  const len_t src_pos = source_position+i;
97  if(m_decoded[src_pos]) {
98  m_buffer[m_cursor] = m_buffer[src_pos];
99  m_decoded[m_cursor] = 1;
100  }
101  else if(factor_stored == false) {
102  factor_stored = true;
103  m_target_pos.push_back(m_cursor);
104  m_source_pos.push_back(src_pos);
105  m_length.push_back(factor_length-i);
106  }
107  ++m_cursor;
108  }
109  }
110 
111  inline void decode_lazy() {
112  size_t lazy = m_lazy;
113  while(lazy > 0) {
114  decode_lazy_();
115  --lazy;
116  }
117  }
118 
119  inline void decode_eagerly() {
120  const len_t factors = m_source_pos.size();
121  StatPhase phase("Decoding Factors");
122  phase.log_stat("factors", factors);
123 
124  IF_STATS(size_t max_size = 0);
125 
126  for(len_t j = 0; j < factors; ++j) {
127  const len_compact_t& target_position = m_target_pos[j];
128  const len_compact_t& source_position = m_source_pos[j];
129  const len_compact_t& factor_length = m_length[j];
130  for(len_t i = 0; i < factor_length; ++i) {
131  if(m_decoded[source_position+i]) {
132  decode_literal_at(target_position+i, m_buffer[source_position+i]);
133  } else {
134  m_fwd.emplace(source_position+i, target_position+i);
135  }
136  }
137  IF_STATS({
138  max_size = std::max(max_size, m_fwd.bucket_count());
139  if((j+1) % ((factors+5)/5) == 0 ) {
140  phase.log_stat("hash table size", m_fwd.bucket_count());
141  phase.log_stat("hash table entries", m_fwd.size());
142 
143  phase.split(
144  std::string("Decoding Factors at position ")
145  + std::to_string(target_position));
146  }
147  })
148  }
149  IF_STATS(phase.log_stat("hash table max size", max_size));
150  }
151 
152  inline len_t longest_chain() const {
153  return m_longest_chain;
154  }
155 
156  inline void write_to(std::ostream& out) {
157  for(auto c : m_buffer) out << c;
158  }
159 };
160 
161 
162 }}//ns
163 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
void push_back(const value_type &val)
Definition: IntVector.hpp:426
IntVector< uint_t< 1 > > BitVector
Represents a bit vector, alias for IntVector with a fixed bit width of 1.
Definition: IntVector.hpp:545
std::string to_string(tdc::uint_impl_t< N > value)
Definition: uint_t.hpp:241
MultimapBuffer(Env &&env, len_t size)
Provides access to runtime and memory measurement in statistics phases.
Definition: StatPhase.hpp:44
void split(const char *new_title)
Starts a new phase as a sibling, reusing the same object.
Definition: StatPhase.hpp:264
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
uint32_t len_compact_t
Type to represent an bit-compact length value.
Definition: def.hpp:103
void decode_factor(const len_t source_position, const len_t factor_length)
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
len_compact_t pos
Definition: LZSSFactors.hpp:38
#define IF_STATS(x)
x is compiled only when the STATS_DISABLED macro is undefined.
Definition: def.hpp:59
void decode_literal(uliteral_t c)
void log_stat(const char *key, const T &value)
Logs a user statistic for this phase.
Definition: StatPhase.hpp:298
void write_to(std::ostream &out)
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
Local environment for a compression/encoding/decompression call.
Interface for algorithms.
Definition: Algorithm.hpp:15
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150