tudocomp
– The TU Dortmund Compression Framework
LZSSCoding.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <cassert>
4 
5 #include <tudocomp/Range.hpp>
8 //#include <tudocomp/compressors/lzss/LZSSDecodeForwardChainBuffer.hpp>
9 //#include <tudocomp/compressors/lzss/LZSSDecodeForwardMultimapBuffer.hpp>
10 //#include <tudocomp/compressors/lzss/LZSSDecodeForwardListMapBuffer.hpp>
11 //#include <tudocomp/compressors/lzss/LZSSDecodeForwardQueueListBuffer.hpp>
12 
14 
15 namespace tdc {
16 namespace lzss {
17 
18 template<typename coder_t, typename text_t, typename factor_t = FactorBuffer>
19 inline void encode_text(coder_t& coder, const text_t& text, const factor_t& factors) {
20  assert(factors.is_sorted());
21 
22  const auto n = text.size();
23 
24  // determine longest and shortest factor
25  const auto flen_min = factors.shortest_factor();
26  const auto flen_max = factors.longest_factor();
27 
28  // determine longest distance between two factors
29  size_t fdist_max = 0;
30  {
31  size_t p = 0;
32  for(auto it = factors.begin(); it != factors.end(); ++it) {
33  const Factor& factor = *it;
34  fdist_max = std::max(fdist_max, factor.pos - p);
35  p = factor.pos + factor.len;
36  }
37 
38  fdist_max = std::max(fdist_max, n - p);
39  }
40 
41  // define ranges
42  const Range text_r(n);
43  const MinDistributedRange flen_r(flen_min, flen_max);
44  const Range fdist_r(fdist_max);
45 
46  // encode ranges
47  coder.encode(n, len_r);
48  coder.encode(flen_min, text_r);
49  coder.encode(flen_max, text_r);
50  coder.encode(fdist_max, text_r);
51 
52  // walk over factors
53  size_t p = 0;
54  for(auto it = factors.begin(); it != factors.end(); ++it) {
55  const Factor& factor = *it;
56 
57  if(factor.pos == p) {
58  // cursor reached factor i, encode 0-bit
59  coder.encode(false, bit_r);
60  } else {
61  // cursor did not yet reach factor i, encode 1-bit
62  coder.encode(true, bit_r);
63 
64  // also encode amount of literals until factor i
65  DCHECK_LE(p, factor.pos);
66  DCHECK_LE(factor.pos - p, fdist_max); //distance cannot be larger than maximum distance
67  coder.encode(factor.pos - p, fdist_r);
68  }
69 
70  // encode literals until cursor reaches factor i
71  while(p < factor.pos) {
72  coder.encode(text[p++], literal_r);
73  }
74 
75  // encode factor
76  DCHECK_LT(factor.src + factor.len, n);
77  coder.encode(factor.src, text_r);
78  coder.encode(factor.len, flen_r);
79 
80  p += size_t(factor.len);
81  }
82 
83  if(p < n) {
84  coder.encode(true, bit_r);
85  coder.encode(n - p, fdist_r);
86  }
87 
88  while(p < n) {
89  // encode symbol
90  coder.encode(text[p++], literal_r);
91  }
92 }
93 
94 template<typename coder_t, typename decode_buffer_t>
95 inline void decode_text(coder_t& decoder, std::ostream& outs) {
96  // decode text range
97  auto text_len = decoder.template decode<len_t>(len_r);
98  Range text_r(text_len);
99 
100  // decode shortest and longest factor
101  auto flen_min = decoder.template decode<len_t>(text_r);
102  auto flen_max = decoder.template decode<len_t>(text_r);
103  MinDistributedRange flen_r(flen_min, flen_max);
104 
105  // decode longest distance between factors
106  auto fdist_max = decoder.template decode<len_t>(text_r);
107  Range fdist_r(fdist_max);
108 
109  // init decode buffer
110  decode_buffer_t buffer(text_len);
111 
112  // decode
113  while(!decoder.eof()) {
114  len_t num;
115 
116  auto b = decoder.template decode<bool>(bit_r);
117  if(b) num = decoder.template decode<len_t>(fdist_r);
118  else num = 0;
119 
120  // decode characters
121  while(num--) {
122  auto c = decoder.template decode<uliteral_t>(literal_r);
123  buffer.decode_literal(c);
124  }
125 
126  if(!decoder.eof()) {
127  //decode factor
128  auto src = decoder.template decode<len_t>(text_r);
129  auto len = decoder.template decode<len_t>(flen_r);
130 
131  buffer.decode_factor(src, len);
132  }
133  }
134 
135  // log stats
136  StatPhase::log("longest_chain", buffer.longest_chain());
137 
138  // write decoded text
139  buffer.write_to(outs);
140 }
141 
142 // template<typename coder_t>
143 // inline void decode_text(coder_t& decoder, std::ostream& outs, bool allow_forward = false) {
144 // if(allow_forward) {
145 // // decode_text_internal<coder_t, DecodeForwardListMapBuffer>(decoder, outs);
146 // //decode_text_internal<coder_t, SuccinctListBuffer>(decoder, outs);
147 // decode_text_internal<coder_t, DecodeForwardQueueListBuffer>(decoder, outs);
148 // //decode_text_internal<coder_t, DecodeForwardMultimapBuffer>(decoder, outs);
149 // } else {
150 // decode_text_internal<coder_t, DecodeBackBuffer>(decoder, outs);
151 // }
152 // }
153 
154 }} //ns
155 
Represents a generic range of positive integers.
Definition: Range.hpp:16
void decode_text(coder_t &decoder, std::ostream &outs)
Definition: LZSSCoding.hpp:95
len_compact_t len
Definition: LZSSFactors.hpp:15
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr auto bit_r
Global predefined range for bits (0 or 1).
Definition: Range.hpp:108
void encode_text(coder_t &coder, const text_t &text, const factor_t &factors)
Definition: LZSSCoding.hpp:19
len_compact_t len
Definition: LZSSFactors.hpp:38
len_compact_t pos
Definition: LZSSFactors.hpp:15
Represents a range of positive integers that tend to be distributed towards the minimum.
Definition: Range.hpp:56
len_compact_t src
Definition: LZSSFactors.hpp:15
constexpr auto literal_r
Global predefined reange for literals.
Definition: Range.hpp:111
len_compact_t src
Definition: LZSSFactors.hpp:38
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
constexpr auto len_r
Global predefined range for len_t.
Definition: Range.hpp:115