tudocomp
– The TU Dortmund Compression Framework
SortedSLPCoder.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/Algorithm.hpp>
7 
8 namespace tdc {namespace esp {
9  template<typename d_coding_t = DMonotonSubseq<>>
10  class SortedSLPCoder: public Algorithm {
11  using RhsAdapter = SLPRhsAdapter;
12  public:
13  inline static Meta meta() {
14  Meta m("slp_coder", "sorted");
15  m.option("d_coding").templated<d_coding_t, DMonotonSubseq<>>("d_coding");
16  m.option("dump_json").dynamic("false");
17  m.option("dump_json_file").dynamic("-");
18  return m;
19  };
20 
22 
23  inline void encode(DebugContext& debug, SLP&& slp, Output& output) const {
24  /*
25  std::cout << "unsorted:\n";
26  for (size_t i = 0; i < slp.rules.size(); i++) {
27  std::cout
28  << i << ": "
29  << i + esp::GRAMMAR_PD_ELLIDED_PREFIX
30  << " -> (" << slp.rules[i][0] << ", " << slp.rules[i][1] << ")\n";
31  }
32  */
33 
34  auto phase = StatPhase("SLP sort");
35  slp_dep_sort(slp); // can be implemented better, and in a way that yields
36  // temporary lists for reusal
37 
38  if (env().option("dump_json").as_bool()) {
39  phase.split("Dump JSON");
40  std::ofstream ostream(env().option("dump_json_file").as_string() + ".json");
41 
42  std::vector<size_t> dl;
43  std::vector<size_t> dr;
44  for (auto& e : slp.rules) {
45  dl.push_back(e[0]);
46  dr.push_back(e[1]);
47  }
48  ostream << "{\n";
49  ostream << " \"DL\" : " << vec_to_debug_string(dl) << "\n,";
50  ostream << ",\n";
51  ostream << " \"DR\" : " << vec_to_debug_string(dr) << "\n";
52  ostream << "}\n";
53 
54  ostream.flush();
55  ostream.close();
56  }
57 
58  phase.split("Encode headers");
59 
60  auto max_val = slp.rules.size() + esp::GRAMMAR_PD_ELLIDED_PREFIX - 1;
61  auto bit_width = bits_for(max_val);
62 
63  auto bouts = std::make_shared<BitOStream>(output.as_stream());
64  auto& bout = *bouts;
65 
66  DCHECK_GE(bit_width, 1);
67  DCHECK_LE(bit_width, 63); // 64-bit sizes are
68  // restricted to 63 or less in practice
69 
70  if (slp.empty) {
71  bit_width = 0;
72  DCHECK(slp.rules.empty());
73  DCHECK_EQ(slp.root_rule, 0);
74  }
75 
76  // bit width
77  bout.write_int(bit_width, 6);
78 
79  // size
80  bout.write_int(max_val, bit_width);
81 
82  // root rule
83  bout.write_int(slp.root_rule, bit_width);
84 
85  if (slp.empty || slp.root_rule < 256) {
86  return;
87  }
88 
89  /*
90  std::cout << "sorted:\n";
91  for (size_t i = 0; i < slp.rules.size(); i++) {
92  std::cout
93  << i << ": "
94  << i + esp::GRAMMAR_PD_ELLIDED_PREFIX
95  << " -> (" << slp.rules[i][0] << ", " << slp.rules[i][1] << ")\n";
96  }*/
97 
98  // ...
99  //std::vector<size_t> rules_lhs;
100  //std::vector<size_t> rules_lhs_diff;
101  //rules_lhs.push_back(e[0]);
102  //rules_lhs_diff.push_back(diff);
103  //std::cout << "emitted lhs: " << vec_to_debug_string(rules_lhs) << "\n";
104  //std::cout << "emitted diffs: " << vec_to_debug_string(rules_lhs_diff) << "\n";
105 
106  phase.split("Encode SLP LHS");
107 
108  // Write rules lhs
109  {
110  size_t last = 0;
111  for (auto& e : slp.rules) {
112  DCHECK_LE(last, e[0]);
113  size_t diff = e[0] - last;
114  bout.write_unary(diff);
115  last = e[0];
116  }
117  }
118 
119  phase.split("Encode SLP RHS");
120 
121  const auto rhs = RhsAdapter { &slp };
122  d_coding_t d_coding { this->env().env_for_option("d_coding") };
123  d_coding.encode(rhs, bouts, bit_width, max_val);
124  }
125 
126  inline SLP decode(Input& input) const {
127  auto bins = std::make_shared<BitIStream>(input.as_stream());
128  auto& bin = *bins;
129 
130  // bit width
131  auto bit_width = bin.read_int<size_t>(6);
132  bool empty = (bit_width == 0);
133 
134  // size
135  auto max_val = bin.read_int<size_t>(bit_width);
136 
137  // root rule
138  auto root_rule = bin.read_int<size_t>(bit_width);
139 
140  //std::cout << "in: Root rule: " << root_rule << "\n";
141  //std::vector<size_t> rules_lhs_diff;
142  //rules_lhs_diff.push_back(diff);
143  //std::cout << "parsed lhs: " << vec_to_debug_string(rules_lhs) << "\n";
144  //std::cout << "parsed diffs: " << vec_to_debug_string(rules_lhs_diff) << "\n";
145 
146  esp::SLP slp;
147  slp.empty = empty;
148  slp.root_rule = root_rule;
149 
150  if (empty || slp.root_rule < 256) {
151  return slp;
152  }
153 
154  size_t slp_size = (max_val + 1) - 256; // implied initial bytes
155  slp.rules.reserve(slp_size);
156  slp.rules.resize(slp_size);
157 
158  // Read rules lhs
159  {
160  size_t last = 0;
161  for(size_t i = 0; i < slp_size && !bin.eof(); i++) {
162  // ...
163  auto diff = bin.read_unary<size_t>();
164  last += diff;
165  slp.rules[i][0] = last;
166  }
167  }
168 
169  auto D = RhsAdapter { &slp };
170  d_coding_t d_coding { this->env().env_for_option("d_coding") };
171  d_coding.decode(D, bins, bit_width, max_val);
172 
173  return slp;
174  }
175  };
176 }}
void encode(DebugContext &debug, SLP &&slp, Output &output) const
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
size_t GRAMMAR_PD_ELLIDED_PREFIX
Definition: SLP.hpp:10
std::string vec_to_debug_string(const T &s, size_t indent=0)
Builds the string representation of a vector of byte values, sorrounded by square brackets ([ and ])...
Env env_for_option(const std::string &option) const
Create the environment for a sub algorithm option.
Definition: Env.hpp:34
InputStream as_stream() const
Creates a stream that allows for character-wise reading of the input.
Definition: Input.hpp:264
Provides access to runtime and memory measurement in statistics phases.
Definition: StatPhase.hpp:44
void slp_dep_sort(SLP &slp)
Definition: SLPDepSort.hpp:7
Algorithm(Algorithm const &)=default
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
OutputStream as_stream() const
Creates a stream that allows for character-wise output.
An abstraction layer for algorithm output.
Definition: Output.hpp:23
bool empty
Definition: SLP.hpp:15
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
SLP decode(Input &input) const
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
Interface for algorithms.
Definition: Algorithm.hpp:15
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150
An abstraction layer for algorithm input.
Definition: Input.hpp:37