tudocomp
– The TU Dortmund Compression Framework
LZWDecoding.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/util.hpp>
6 
7 namespace tdc {
8 namespace lzw {
9 
11 
12 template<class F>
13 void decode_step(F next_code_callback,
14  std::ostream& out,
15  const CodeType dms,
16  const CodeType reserve_dms) {
17  std::vector<std::pair<CodeType, uliteral_t>> dictionary;
18 
19  // "named" lambda function, used to reset the dictionary to its initial contents
20  const auto reset_dictionary = [&] {
21  dictionary.clear();
22  dictionary.reserve(reserve_dms);
23 
24  const long int minc = std::numeric_limits<uliteral_t>::min();
25  const long int maxc = std::numeric_limits<uliteral_t>::max();
26 
27  for (long int c = minc; c <= maxc; ++c)
28  dictionary.push_back({dms, static_cast<uliteral_t> (c)});
29  };
30 
31  const auto rebuild_string = [&](CodeType k) -> const std::vector<uliteral_t> * {
32  static std::vector<uliteral_t> s; // String
33 
34  s.clear();
35 
36  // the length of a string cannot exceed the dictionary's number of entries
37  s.reserve(reserve_dms);
38 
39  while (k != dms)
40  {
41  s.push_back(dictionary[k].second);
42  k = dictionary[k].first;
43  }
44 
45  std::reverse(s.begin(), s.end());
46  return &s;
47  };
48 
49  reset_dictionary();
50 
51  CodeType i {dms}; // Index
52  CodeType k; // Key
53 
54  bool corrupted = false;
55 
56  while (true)
57  {
58  bool dictionary_reset = false;
59 
60  // dictionary's maximum size was reached
61  if (dictionary.size() == dms)
62  {
63  reset_dictionary();
64  dictionary_reset = true;
65  }
66 
67  if (!next_code_callback(k, dictionary_reset, corrupted))
68  break;
69 
70  //std::cout << byte_to_nice_ascii_char(k) << "\n";
71 
72  if (k > dictionary.size()) {
73  std::stringstream s;
74  s << "invalid compressed code " << k;
75  throw std::runtime_error(s.str());
76  }
77 
78  const std::vector<uliteral_t> *s; // String
79 
80  if (k == dictionary.size())
81  {
82  dictionary.push_back({i, rebuild_string(i)->front()});
83  s = rebuild_string(k);
84  }
85  else
86  {
87  s = rebuild_string(k);
88 
89  if (i != dms)
90  dictionary.push_back({i, s->front()});
91  }
92 
93  out.write((char*) &s->front(), s->size());
94  i = k;
95  }
96 
97  if (corrupted)
98  throw std::runtime_error("corrupted compressed file");
99 }
100 
101 }} //ns
102 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
void decode_step(F next_code_callback, std::ostream &out, const CodeType dms, const CodeType reserve_dms)
Definition: LZWDecoding.hpp:13
lz78::factorid_t CodeType
Definition: LZWDecoding.hpp:10
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11