tudocomp
– The TU Dortmund Compression Framework
LZSSSlidingWindowCompressor.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 #include <tudocomp/Literal.hpp>
5 #include <tudocomp/Range.hpp>
6 #include <tudocomp/util.hpp>
7 
9 
10 namespace tdc {
11 
14 template<typename coder_t>
16 
17 private:
18  size_t m_window;
19 
20 public:
21  inline static Meta meta() {
22  Meta m("compressor", "lzss", "Lempel-Ziv-Storer-Szymanski (Sliding Window)");
23  m.option("coder").templated<coder_t>("coder");
24  m.option("window").dynamic(16);
25  m.option("threshold").dynamic(3);
26  return m;
27  }
28 
30  inline LZSSSlidingWindowCompressor() = delete;
31 
34  {
35  m_window = this->env().option("window").as_integer();
36  }
37 
39  inline virtual void compress(Input& input, Output& output) override {
40  auto ins = input.as_stream();
41 
42  typename coder_t::Encoder coder(env().env_for_option("coder"), output, NoLiterals());
43 
44  std::vector<uint8_t> buf;
45 
46  size_t ahead = 0; //marks the index in the buffer at which the back buffer ends and the ahead buffer begins
47  char c;
48 
49  StatPhase phase("Factorize");
50 
51  //initially fill the buffer
52  size_t buf_off = 0;
53  while(buf.size() < 2 * m_window && ins.get(c)) {
54  buf.push_back(uint8_t(c));
55  }
56 
57  //factorize
58  const len_t threshold = env().option("threshold").as_integer(); //factor threshold
59  phase.log_stat("threshold", threshold);
60 
61  size_t pos = 0;
62  bool eof = false;
63  while(ahead < buf.size()) {
64  size_t fpos = 0, fsrc = 0, fnum = 0;
65 
66  //walk back buffer
67  for(size_t k = (ahead > m_window ? ahead - m_window : 0); k < ahead; k++) {
68  //compare string
69  size_t j = 0;
70  while(ahead + j < buf.size() && buf[k + j] == buf[ahead + j]) {
71  ++j;
72  }
73 
74  //factorize if longer than one already found
75  if(j >= threshold && j > fnum) {
76  fpos = buf_off + ahead;
77  fsrc = buf_off + k;
78  fnum = j;
79  }
80  }
81 
82  //output longest factor or symbol
83  size_t advance;
84 
85  if(fnum > 0) {
86  // encode factor
87  coder.encode(true, bit_r);
88  coder.encode(fpos - fsrc, Range(fpos)); //delta
89  coder.encode(fnum, Range(m_window));
90 
91  advance = fnum;
92  } else {
93  // encode literal
94  coder.encode(false, bit_r);
95  coder.encode(uliteral_t(buf[ahead]), literal_r);
96 
97  advance = 1;
98  }
99 
100  //advance buffer
101  pos += advance;
102  while(advance--) {
103  if(ahead < m_window) {
104  //case 1: still reading the first w symbols from the stream
105  ++ahead;
106  } else if(!eof && ins.get(c)) {
107  //case 2: read a new symbol
108  buf.erase(buf.begin()); //TODO ouch
109  buf.push_back(uint8_t(c));
110  ++buf_off;
111  } else {
112  //case 3: EOF, read rest of buffer
113  eof = true;
114  ++ahead;
115  }
116  }
117  }
118  }
119 
120  inline virtual void decompress(Input& input, Output& output) override {
121  typename coder_t::Decoder decoder(env().env_for_option("coder"), input);
122 
123  std::vector<uliteral_t> text;
124  while(!decoder.eof()) {
125  bool is_factor = decoder.template decode<bool>(bit_r);
126  if(is_factor) {
127  size_t fsrc = text.size() - decoder.template decode<size_t>(Range(text.size()));
128 
129  //TODO are the compressor options saved into tudocomp's magic?
130  size_t fnum = decoder.template decode<size_t>(Range(m_window));
131 
132  for(size_t i = 0; i < fnum; i++) {
133  text.push_back(text[fsrc+i]);
134  }
135  } else {
136  auto c = decoder.template decode<uliteral_t>(literal_r);
137  text.push_back(c);
138  }
139  }
140 
141  auto outs = output.as_stream();
142  for(uint8_t c : text) outs << c;
143  }
144 };
145 
146 } //ns
147 
Represents a generic range of positive integers.
Definition: Range.hpp:16
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
constexpr auto bit_r
Global predefined range for bits (0 or 1).
Definition: Range.hpp:108
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
const OptionValue & option(const std::string &option) const
Get an option of this algorithm.
Definition: Env.hpp:44
Base for data compressors.
Definition: Compressor.hpp:19
LZSSSlidingWindowCompressor()=delete
Default constructor (not supported).
InputStream as_stream() const
Creates a stream that allows for character-wise reading of the input.
Definition: Input.hpp:264
Provides access to runtime and memory measurement in statistics phases.
Definition: StatPhase.hpp:44
uint64_t as_integer() const
virtual void decompress(Input &input, Output &output) override
Decompress the given input to the given output.
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
OutputStream as_stream() const
Creates a stream that allows for character-wise output.
An empty literal iterator that yields no literals whatsoever.
Definition: Literal.hpp:37
constexpr auto literal_r
Global predefined reange for literals.
Definition: Range.hpp:111
An abstraction layer for algorithm output.
Definition: Output.hpp:23
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
len_compact_t pos
Definition: LZSSFactors.hpp:38
void log_stat(const char *key, const T &value)
Logs a user statistic for this phase.
Definition: StatPhase.hpp:298
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
LZSSSlidingWindowCompressor(Env &&e)
Construct the class with an environment.
virtual void compress(Input &input, Output &output) override
Compress the given input to the given output.
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
Local environment for a compression/encoding/decompression call.
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150
An abstraction layer for algorithm input.
Definition: Input.hpp:37
Computes the LZ77 factorization of the input by moving a sliding window over it in which redundant ph...