tudocomp
– The TU Dortmund Compression Framework
LZSSLCPCompressor.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <algorithm>
4 #include <functional>
5 #include <vector>
6 
8 #include <tudocomp/Range.hpp>
9 #include <tudocomp/util.hpp>
10 
14 
15 #include <tudocomp/ds/TextDS.hpp>
16 
18 
19 namespace tdc {
20 
23 template<typename coder_t, typename text_t = TextDS<>>
24 class LZSSLCPCompressor : public Compressor {
25 public:
26  inline static Meta meta() {
27  Meta m("compressor", "lzss_lcp", "LZSS Factorization using LCP");
28  m.option("coder").templated<coder_t>("coder");
29  m.option("textds").templated<text_t, TextDS<>>("textds");
30  m.option("threshold").dynamic(3);
32  return m;
33  }
34 
36  inline LZSSLCPCompressor() = delete;
37 
39  inline LZSSLCPCompressor(Env&& env) : Compressor(std::move(env)) {
40  }
41 
42  inline virtual void compress(Input& input, Output& output) override {
43  auto view = input.as_view();
44  DCHECK(view.ends_with(uint8_t(0)));
45 
46  // Construct text data structures
47  text_t text = StatPhase::wrap("Construct Text DS", [&]{
48  return text_t(env().env_for_option("textds"), view,
50  });
51 
52  auto& sa = text.require_sa();
53  auto& isa = text.require_isa();
54  auto& lcp = text.require_lcp();
55 
56  // Factorize
57  const len_t text_length = text.size();
58  lzss::FactorBuffer factors;
59 
60  StatPhase::wrap("Factorize", [&]{
61  const len_t threshold = env().option("threshold").as_integer(); //factor threshold
62 
63  for(len_t i = 0; i+1 < text_length;) { // we omit T[text_length-1] since we assume that it is the \0 byte!
64  //get SA position for suffix i
65  const size_t& cur_pos = isa[i];
66  DCHECK_NE(cur_pos,0); // isa[i] == 0 <=> T[i] = 0
67 
68  //compute naively PSV
69  //search "upwards" in LCP array
70  //include current, exclude last
71  size_t psv_lcp = lcp[cur_pos];
72  ssize_t psv_pos = cur_pos - 1;
73  if (psv_lcp > 0) {
74  while (psv_pos >= 0 && sa[psv_pos] > sa[cur_pos]) {
75  psv_lcp = std::min<size_t>(psv_lcp, lcp[psv_pos--]);
76  }
77  }
78 
79  //compute naively NSV, TODO: use NSV data structure
80  //search "downwards" in LCP array
81  //exclude current, include last
82  size_t nsv_lcp = 0;
83  size_t nsv_pos = cur_pos + 1;
84  if (nsv_pos < text_length) {
85  nsv_lcp = SSIZE_MAX;
86  do {
87  nsv_lcp = std::min<size_t>(nsv_lcp, lcp[nsv_pos]);
88  if (sa[nsv_pos] < sa[cur_pos]) {
89  break;
90  }
91  } while (++nsv_pos < text_length);
92 
93  if (nsv_pos >= text_length) {
94  nsv_lcp = 0;
95  }
96  }
97 
98  //select maximum
99  const size_t& max_lcp = std::max(psv_lcp, nsv_lcp);
100  if(max_lcp >= threshold) {
101  const ssize_t& max_pos = max_lcp == psv_lcp ? psv_pos : nsv_pos;
102  DCHECK_LT(max_pos, text_length);
103  DCHECK_GE(max_pos, 0);
104  // new factor
105  factors.emplace_back(i, sa[max_pos], max_lcp);
106 
107  i += max_lcp; //advance
108  } else {
109  ++i; //advance
110  }
111  }
112 
113  StatPhase::log("threshold", threshold);
114  StatPhase::log("factors", factors.size());
115  });
116 
117  // encode
118  StatPhase::wrap("Encode", [&]{
119  typename coder_t::Encoder coder(env().env_for_option("coder"),
120  output, lzss::TextLiterals<text_t>(text, factors));
121 
122  lzss::encode_text(coder, text, factors); //TODO is this correct?
123  });
124  }
125 
126  inline virtual void decompress(Input& input, Output& output) override {
127  typename coder_t::Decoder decoder(env().env_for_option("coder"), input);
128  auto outs = output.as_stream();
129 
130  lzss::decode_text<typename coder_t::Decoder, lzss::DecodeBackBuffer>(decoder, outs);
131  }
132 };
133 
134 }
135 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
virtual void decompress(Input &input, Output &output) override
Decompress the given input to the given output.
const OptionValue & option(const std::string &option) const
Get an option of this algorithm.
Definition: Env.hpp:44
Computes the LZ77 factorization of the input using its suffix array and LCP table.
Base for data compressors.
Definition: Compressor.hpp:19
constexpr dsflags_t ISA
Definition: TextDSFlags.hpp:12
void encode_text(coder_t &coder, const text_t &text, const factor_t &factors)
Definition: LZSSCoding.hpp:19
uint64_t as_integer() const
void uses_textds(ds::dsflags_t flags)
Indicates that this Algorithm uses the TextDS class, and how it does.
Definition: Meta.hpp:277
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
OutputStream as_stream() const
Creates a stream that allows for character-wise output.
virtual void compress(Input &input, Output &output) override
Compress the given input to the given output.
LZSSLCPCompressor()=delete
Default constructor (not supported).
An abstraction layer for algorithm output.
Definition: Output.hpp:23
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
LZSSLCPCompressor(Env &&env)
Construct the class with an environment.
void emplace_back(len_t fpos, len_t fsrc, len_t flen)
Definition: LZSSFactors.hpp:41
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
InputView as_view() const
Provides a view on the input that allows for random access.
Definition: Input.hpp:260
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
constexpr dsflags_t LCP
Definition: TextDSFlags.hpp:13
Local environment for a compression/encoding/decompression call.
Manages text related data structures.
Definition: TextDS.hpp:30
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150
An abstraction layer for algorithm input.
Definition: Input.hpp:37