tudocomp
– The TU Dortmund Compression Framework
EspContextImpl.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 
8 
10 
11 namespace tdc {namespace esp {
12  template<typename ipd_t>
13  template<typename T>
15  size_t root_node = 0;
16  bool empty = false;
17 
18  SLP slp;
19  size_t slp_counter = 256;
20  size_t prev_slp_counter = 0;
21 
22  std::unique_ptr<Round<ipd_t>> round;
23 
24  // Initialize initial round
25  {
26  auto phase = with_env([&](auto& env) {
27  return StatPhase("Prepare round 0");
28  });
29 
30  round = std::make_unique<Round<ipd_t>>(Round<ipd_t> {
32  256, // TODO: Calc actual alphabet size
34  });
35  round->string.width(bits_for(256 - 1));
36  round->string.reserve(input.size(), bits_for(256 - 1));
37  for (auto c : input) {
38  round->string.push_back(c);
39  }
40  auto discard = std::move(input);
41  }
42 
43  for(size_t n = 0;; n++) {
44  auto phase = with_env([&](auto& env) {
45  std::stringstream ss;
46  ss << "Round " << n;
47  return StatPhase(ss.str());
48  });
49 
50  auto& r = *round;
51  in_t in = r.string;
52 
54  r.alphabet,
55  in,
56  behavior_metablocks_maximimze_repeating,
57  behavior_landmarks_tie_to_right,
58  debug.round(),
59  };
60 
61  ctx.debug.init(n, in, r.alphabet);
62 
63  if (in.size() == 0) {
64  empty = true;
65  ctx.debug.last_round(0, true);
66  break;
67  }
68  if (in.size() == 1) {
69  root_node = in[0] + prev_slp_counter;
70  ctx.debug.last_round(root_node, false);
71  break;
72  }
73 
74  IntVector<dynamic_t> new_layer;
75  size_t new_layer_width = bits_for(in.size() - 1);
76  new_layer.width(new_layer_width);
77  new_layer.reserve(in.size() / 2 + 1, new_layer_width);
78 
79  ctx.split(in);
80 
81  const auto& v = ctx.adjusted_blocks();
82 
83  ctx.debug.slice_symbol_map_start();
84  {
85  in_t s = in;
86  for (auto e : v) {
87  auto slice = s.slice(0, e.len);
88  s = s.slice(e.len);
89  auto rule_name = r.gr.add(slice) - (r.gr.initial_counter() - 1);
90 
91  ctx.debug.slice_symbol_map(slice, rule_name);
92 
93  auto old_cap = new_layer.capacity();
94  new_layer.push_back(rule_name);
95  auto new_cap = new_layer.capacity();
96  DCHECK_EQ(old_cap, new_cap);
97  }
98  }
99 
100  // Delete previous string
101  r.string = IntVector<dynamic_t>();
102 
103  DCHECK_EQ(r.string.size(), 0);
104  DCHECK_EQ(r.string.capacity(), 0);
105 
106  new_layer.shrink_to_fit();
107 
108  // Append to slp array
109  {
110  size_t old_slp_size = slp.rules.size();
111  size_t additional_slp_size = r.gr.rules_count();
112  size_t new_slp_size = old_slp_size + additional_slp_size;
113 
114  slp.rules.reserve(new_slp_size);
115  slp.rules.resize(new_slp_size);
116 
117  auto& rv = slp.rules;
118 
119  r.gr.for_all([&](const auto& k, const auto& val_) {
120  const auto& val = val_ - r.gr.initial_counter();
121  const auto& key = k.as_view();
122 
123  size_t store_idx = slp_counter + val - 256;
124  rv[store_idx][0] = key[0] + prev_slp_counter;
125  rv[store_idx][1] = key[1] + prev_slp_counter;
126  });
127 
128  prev_slp_counter = slp_counter;
129  slp_counter += additional_slp_size;
130  }
131 
132  // carry over stats
133  auto round_ipd_stats = r.gr.stats();
134  ipd_stats.ext_size2_total += round_ipd_stats.ext_size2_total;
135  ipd_stats.ext_size3_total += round_ipd_stats.ext_size3_total;
136  ipd_stats.ext_size3_unique += round_ipd_stats.ext_size3_unique;
137  ipd_stats.int_size2_total += round_ipd_stats.int_size2_total;
138  ipd_stats.int_size2_unique += round_ipd_stats.int_size2_unique;
139 
140  // Delete previous hashmap
141  r.gr.clear();
142 
143  // Prepare next round
144  auto tmp = Round<ipd_t> {
145  GrammarRules<ipd_t>(r.gr.rules_count()),
146  r.gr.rules_count(),
147  std::move(new_layer),
148  };
149 
150  round.reset();
151  round = std::make_unique<Round<ipd_t>>(std::move(tmp));
152 
153  phase.log_stat("SLP size", slp.rules.size());
154  phase.log_stat("ext_size2_total", round_ipd_stats.ext_size2_total);
155  phase.log_stat("ext_size3_total", round_ipd_stats.ext_size3_total);
156  phase.log_stat("ext_size3_unique", round_ipd_stats.ext_size3_unique);
157  phase.log_stat("int_size2_total", round_ipd_stats.int_size2_total);
158  phase.log_stat("int_size2_unique", round_ipd_stats.int_size2_unique);
159  }
160 
161  slp.empty = empty;
162  slp.root_rule = root_node;
163 
164  return slp;
165  }
166 }}
BitPackingVectorSlice< dynamic_t > in_t
Definition: HashArray.hpp:9
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
size_t rules_count() const
std::vector< std::array< size_t, 2 > > rules
Definition: SLP.hpp:13
size_t root_rule
Definition: SLP.hpp:14
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
void push_back(const value_type &val)
Definition: IntVector.hpp:426
Provides access to runtime and memory measurement in statistics phases.
Definition: StatPhase.hpp:44
void init(size_t number, const X &string, size_t alphabet_size)
DebugRoundContext debug
bool empty
Definition: SLP.hpp:15
size_type capacity() const
Definition: IntVector.hpp:345
void reserve(size_type n)
Definition: IntVector.hpp:357