7 namespace tdc {
namespace esp {
12 using CountMap = std::vector<size_t>;
15 size_t m_codebook_size = 0;
16 size_t m_literal_count = 0;
17 size_t m_min_range = std::numeric_limits<size_t>::max();
18 size_t m_literal_counter = 0;
20 size_t m_lower_bound = 0;
21 size_t m_upper_bound = std::numeric_limits<size_t>::max();
23 std::shared_ptr<BitOStream> m_out;
31 CountMap count_alphabet_literals(
const T& input) {
33 for (
size_t i = 0; i < input.size(); i++) {
34 max_value = std::max(max_value, input[i]);
38 ret.reserve(std::max(max_value + 1, overrite));
39 ret.resize(std::max(max_value + 1, overrite));
41 for (
size_t i = 0; i < input.size(); i++) {
42 size_t val = input[i];
43 DCHECK_LT(ret[val], std::numeric_limits<size_t>::max());
57 void build_intervals(CountMap& c) {
61 size_t min = std::numeric_limits<size_t>::max();
63 for(
size_t i = 1; i < c.size(); i++) {
66 min = std::min(min, c[i]);
68 c[i] = c[i] + c[i - 1];
70 m_literal_count = c[c.size() - 2];
73 for(
size_t i = 1; i < c.size(); i++) {
76 m_min_range = c[c.size() - 2];
80 inline void setNewBounds(
size_t v) {
81 size_t range = m_upper_bound - m_lower_bound;
83 if(range < m_min_range) {
84 writeCode(m_lower_bound);
86 m_upper_bound = std::numeric_limits<size_t>::max();
87 range = m_upper_bound - m_lower_bound;
89 DCHECK_NE(m_lower_bound, m_upper_bound);
91 const size_t literal_count = m_count_map[m_count_map.size() - 1];
94 const size_t offset_upper = range <= literal_count
95 ? (range * m_count_map[v]) / literal_count
96 : (range / literal_count) * m_count_map[v];
98 m_upper_bound = m_lower_bound + offset_upper;
101 const size_t offset_lower = range <= literal_count
102 ? (range * m_count_map[v - 1]) / literal_count
103 : (range / literal_count) * m_count_map[v - 1];
104 m_lower_bound = m_lower_bound + offset_lower;
108 inline void writeCodebook() {
119 m_out->write_int<
size_t>(m_literal_count);
120 std::cout<<
" " << __LINE__ <<
" write " << size_t(m_literal_count) <<
" \n";
123 m_out->write_int<
size_t>(m_codebook_size);
124 std::cout<<
" " << __LINE__ <<
" write " << size_t(m_codebook_size) <<
" \n";
126 if(m_count_map[0] != 0) {
127 m_out->write_int<
size_t>(0);
128 std::cout<<
" " << __LINE__ <<
" write " << size_t(0) <<
" \n";
129 m_out->write_int<
size_t>(m_count_map[0]);
130 std::cout<<
" " << __LINE__ <<
" write " << size_t(m_count_map[0]) <<
" \n";
132 for(
size_t i = 1; i < m_count_map.size(); i++) {
133 if(m_count_map[i] != m_count_map[i-1]) {
134 m_out->write_int<
size_t>(i);
135 std::cout<<
" " << __LINE__ <<
" write " << size_t(i) <<
" \n";
136 m_out->write_int<
size_t>(m_count_map[i]);
137 std::cout<<
" " << __LINE__ <<
" write " << size_t(m_count_map[i]) <<
" \n";
142 inline void writeCode(
size_t code) {
143 m_out->write_int<
size_t>(code);
144 std::cout<<
" " << __LINE__ <<
" write " << size_t(code) <<
" \n";
148 void postProcessing() {
149 writeCode(m_lower_bound);
152 m_out->write_int<
size_t>(std::numeric_limits<size_t>::max());
153 std::cout<<
" " << __LINE__ <<
" write " << size_t(std::numeric_limits<size_t>::max()) <<
" \n";
157 template<
typename input_t>
159 const input_t& literals):
161 m_count_map(count_alphabet_literals(literals))
163 build_intervals(m_count_map);
171 if(m_literal_counter == m_literal_count) {
180 std::shared_ptr<BitIStream> m_in;
182 size_t m_codebook_size = 0;
183 size_t m_literal_count = 0;
184 std::vector<std::pair<size_t, size_t>> m_literals;
185 size_t m_min_range = std::numeric_limits<size_t>::max();
186 size_t m_literals_read = 0;
187 size_t m_literal_counter = 0;
189 std::vector<size_t> m_decoded;
191 void decode(
size_t code) {
192 size_t lower_bound = 0;
193 size_t upper_bound = std::numeric_limits<size_t>::max();
194 std::vector<size_t> os;
195 size_t interval_parts = m_literals[m_codebook_size - 1].second;
198 size_t range = upper_bound - lower_bound;
201 while(m_min_range <= range && m_literal_counter < m_literal_count) {
202 size_t interval_lower_bound = lower_bound;
205 for(
size_t i = 0; i < m_codebook_size ; i++) {
206 const std::pair<size_t, size_t>& pair = m_literals[i];
208 const size_t offset = range <= interval_parts
209 ? range * pair.second / interval_parts
210 : range / interval_parts * pair.second;
212 upper_bound = lower_bound + offset;
214 std::cout <<
"i: " << i <<
", code: " << code <<
", upper_bound: " << upper_bound <<
"\n";
216 if(code < upper_bound) {
218 os.push_back(pair.first);
219 lower_bound = interval_lower_bound;
222 interval_lower_bound = upper_bound;
225 range = upper_bound - lower_bound;
240 m_literal_count = m_in->read_int<
size_t>();
241 std::cout<<
" " << __LINE__ <<
" read " << size_t(m_literal_count) <<
" \n";
242 m_codebook_size = m_in->read_int<
size_t>();
243 std::cout<<
" " << __LINE__ <<
" read " << size_t(m_codebook_size) <<
" \n";
245 m_literals.reserve(m_codebook_size);
246 m_literals.resize(m_codebook_size);
249 for (
size_t i = 0; i < m_codebook_size; i++) {
250 size_t c = m_in->read_int<
size_t>();
251 std::cout<<
" " << __LINE__ <<
" read " << size_t(c) <<
" \n";
252 size_t val = m_in->read_int<
size_t>();
253 std::cout<<
" " << __LINE__ <<
" read " << size_t(val) <<
" \n";
254 m_literals[i] = std::pair<size_t, size_t>(c, val);
257 m_min_range = m_literals[m_codebook_size - 1].second;
262 if(!m_decoded.size()) {
263 std::cout<<
" " << __LINE__ <<
" cond 1 " <<
" \n";
264 size_t code = m_in->read_int<
size_t>();
265 std::cout<<
" " << __LINE__ <<
" read " << size_t(code) <<
" \n";
267 if(code != std::numeric_limits<size_t>::max()) {
268 std::cout<<
" " << __LINE__ <<
" cond 2 " <<
" \n";
274 size_t val = m_decoded[m_literals_read++];
275 std::cout <<
"val: " << size_t(val) <<
", lread: " << m_literals_read <<
"\n";
278 if(m_literals_read == m_decoded.size()) {
279 std::cout<<
" " << __LINE__ <<
" cond 3 " <<
" \n";
280 size_t code = m_in->read_int<
size_t>();
281 std::cout<<
" " << __LINE__ <<
" read " << size_t(code) <<
" \n";
283 if(code != std::numeric_limits<size_t>::max()) {
284 std::cout<<
" " << __LINE__ <<
" cond 4 " <<
" \n";
Contains the text compression and encoding framework.
Encodes data to an ASCII character stream.
std::string vec_to_debug_string(const T &s, size_t indent=0)
Builds the string representation of a vector of byte values, sorrounded by square brackets ([ and ])...
Decodes data from an Arithmetic character stream.
ArithmeticEncoder(const std::shared_ptr< BitOStream > &out, const input_t &literals)
ArithmeticDecoder(const std::shared_ptr< BitIStream > &in)