10 namespace tdc {
namespace esp {
19 using Numl = std::vector<size_t>;
25 map_from_effective.reserve(alphabet_size);
26 map_from_effective.resize(alphabet_size);
29 for(
size_t i = 0; i < counts.size(); ++i) {
30 if(counts[i] == 0)
continue;
31 DCHECK_LT(j, alphabet_size);
32 map_from_effective[j++] = i;
34 DCHECK_EQ(j, alphabet_size);
35 for(
size_t i = 0; i < alphabet_size; ++i) {
36 DCHECK_NE(counts[map_from_effective[i]], 0);
38 return map_from_effective;
43 size_t alphabet_size) {
44 std::vector<size_t> A;
45 A.reserve(alphabet_size * 2);
46 A.resize(alphabet_size * 2);
48 for(
size_t i = 0; i < alphabet_size; i++) {
50 <<
"Char " << map_from_effective[i]
51 <<
" : " << size_t(counts[map_from_effective[i]])
53 A[alphabet_size + i] = counts[map_from_effective[i]];
54 A[i] = alphabet_size + i;
58 auto comp = [&A] (
const size_t a,
const size_t b) ->
bool {
61 std::make_heap(&A[0], &A[alphabet_size], comp);
62 DCHECK_LE(A[A[0]], A[A[1]]);
66 size_t h = alphabet_size - 1;
70 std::pop_heap(A.begin(), A.begin() + h + 1, comp);
71 const size_t m1 = A[h];
74 std::pop_heap(A.begin(), A.begin() + h + 1, comp);
75 const size_t m2 = A[h];
79 A[h + 1] = A[m1] + A[m2];
81 A[m1] = A[m2] = h + 1;
82 std::push_heap(A.begin(), A.begin() + h + 1, comp);
86 for(
size_t i = 2; i < 2 * alphabet_size; ++i) {
91 codelengths.reserve(alphabet_size);
92 codelengths.resize(alphabet_size);
93 for (
size_t i = 0; i < alphabet_size; i++) {
94 DCHECK_LE(A[alphabet_size + i], 64);
95 codelengths[i] = A[alphabet_size+i];
97 <<
"Char " << map_from_effective[i]
98 <<
" : " << codelengths[i]
106 inline static Numl gen_numl(
const OrderedCodelengths& ordered_codelengths,
const size_t alphabet_size,
const uint8_t longest) {
107 DCHECK_EQ(longest, *std::max_element(ordered_codelengths.begin(),
108 ordered_codelengths.end()));
109 DCHECK_GT(longest, 0);
113 numl.reserve(longest);
114 numl.resize(longest);
116 for (
size_t i = 0; i < alphabet_size; ++i) {
117 DCHECK_LE(ordered_codelengths[i], longest);
118 DCHECK_GT(ordered_codelengths[i], 0);
119 ++numl[ordered_codelengths[i] - 1];
125 inline static Firstcode gen_first_codes(
const Numl& numl,
const size_t longest) {
127 firstcode.reserve(longest);
128 firstcode.resize(longest);
130 firstcode[longest - 1] = 0;
131 for(
size_t i = longest-1; i > 0; --i) {
132 firstcode[i - 1] = (firstcode[i] + numl[i]) / 2;
138 const size_t alphabet_size,
140 const uint8_t longest) {
141 DCHECK_EQ(longest, *std::max_element(ordered_codelengths.begin(),
142 ordered_codelengths.end()));
143 DCHECK_GT(longest,0);
145 auto firstcode = gen_first_codes(numl, longest);
148 codewords.reserve(alphabet_size);
149 codewords.resize(alphabet_size);
150 for(
size_t i = 0; i < alphabet_size; ++i) {
151 DCHECK_LE(ordered_codelengths[i], longest);
152 DCHECK_GT(ordered_codelengths[i], 0);
153 codewords[i] = firstcode[ordered_codelengths[i]-1]++;
154 DVLOG(2) <<
"codeword " << i <<
" : " << std::bitset<64>(codewords[i])
155 <<
", length " << ordered_codelengths[i]
163 const size_t alphabet_size,
164 const size_t real_alphabet_size)
167 size_t max = real_alphabet_size;
171 map_to_effective.reserve(max);
172 map_to_effective.resize(max,
size_t(-1));
174 for(
size_t i = 0; i < alphabet_size; ++i) {
175 map_to_effective[ordered_map_from_effective[i]] = i;
178 DVLOG(2) <<
"ordered_map_from_effective : " 181 DVLOG(2) <<
"map_to_effective : " 185 return map_to_effective;
202 DCHECK_GT(alphabet_size, 0);
209 auto map_from_effective = gen_effective_alphabet(counts, alphabet_size);
210 auto codelengths = gen_codelengths(std::move(counts),
213 std::vector<size_t> codeword_order;
214 codeword_order.reserve(alphabet_size);
215 codeword_order.resize(alphabet_size);
216 std::iota(codeword_order.begin(), codeword_order.end(), 0);
217 std::sort(codeword_order.begin(), codeword_order.end(),
218 [&] (
size_t i,
size_t j) ->
bool {
219 return codelengths[i] < codelengths[j];
221 longest = *std::max_element(codelengths.begin(),
224 ordered_codelengths.reserve(alphabet_size);
225 ordered_codelengths.resize(alphabet_size);
227 ordered_map_from_effective.reserve(alphabet_size);
228 ordered_map_from_effective.resize(alphabet_size);
230 for(
size_t i = 0; i < alphabet_size; ++i) {
231 ordered_codelengths[i] = codelengths[codeword_order[i]];
232 ordered_map_from_effective[i] = map_from_effective[codeword_order[i]];
236 auto numl = gen_numl(ordered_codelengths, alphabet_size, longest);
237 auto codewords = gen_codewords(ordered_codelengths, alphabet_size, numl, longest);
240 m_codewords = std::move(codewords);
241 m_ordered_codelengths = std::move(ordered_codelengths);
247 template<
typename input_t>
250 for (
size_t i = 0; i < inp.size(); i++) {
251 max = std::max(max, inp[i]);
253 size_t alphabet_size = max + 1;
256 counts.reserve(alphabet_size);
257 counts.resize(alphabet_size);
258 for (
size_t i = 0; i < inp.size(); i++) {
264 size_t shrunk_alphabet_size = 0;
265 for (
size_t i = 0; i < counts.size(); i++) {
266 shrunk_alphabet_size += (counts[i] != 0);
269 if (shrunk_alphabet_size <= 1) {
272 gen_huffmantable(std::move(counts), shrunk_alphabet_size);
277 inline static void huffman_encode(
282 const size_t alphabet_size,
285 const size_t char_value = input;
286 const size_t effective_char = ordered_map_to_effective[char_value];
287 DCHECK_LT(effective_char, alphabet_size);
288 os.
write_int(codewords[effective_char], ordered_codelengths[effective_char]);
289 DVLOG(2) <<
" codeword " 290 << codewords[effective_char]
291 <<
" length " << int(ordered_codelengths[effective_char])
300 for(
size_t i = 0; i < table.
m_longest; ++i) {
318 numl.reserve(longest);
319 numl.resize(longest);
321 for(
size_t i = 0; i < longest; ++i) {
327 ordered_map_from_effective.reserve(alphabet_size);
328 ordered_map_from_effective.resize(alphabet_size);
329 for(
size_t i = 0; i < alphabet_size; ++i) {
330 ordered_map_from_effective[i] = in.
read_int<
size_t>(
bits_for(real_size - 1));
334 std::move(ordered_map_from_effective),
343 const size_t alphabet_size,
const Numl& numl,
const size_t longest)
346 ordered_codelengths.reserve(alphabet_size);
347 ordered_codelengths.resize(alphabet_size);
348 for(
size_t i = 0, k = 0; i < longest; ++i) {
349 for(
size_t j = 0; j < numl[i]; ++j) {
350 DCHECK_LT(k, alphabet_size);
351 ordered_codelengths[k++] = i + 1;
354 return ordered_codelengths;
359 size_t alphabet_size,
363 prefix_sum_lengths.reserve(longest);
364 prefix_sum_lengths.resize(longest);
371 prefix_sum_lengths[ordered_codelengths[0] - 1] = 0;
372 for(
size_t i = 1; i < alphabet_size; ++i) {
373 if(ordered_codelengths[i - 1] < ordered_codelengths[i]) {
374 prefix_sum_lengths[ordered_codelengths[i] - 1] = i;
377 DVLOG(2) <<
"ordered_codelengths : " 380 DVLOG(2) <<
"prefix_sum_lengths : " 383 return prefix_sum_lengths;
386 inline static size_t huffman_decode(
397 value = (value << 1) + is.
read_bit();
399 }
while(value < firstcodes[length - 1]);
401 DVLOG(2) <<
" codeword " << value <<
" length " << length <<
"\t";
404 return ordered_map_from_effective[prefix_sum_lengths[length]+ (value - firstcodes[length]) ];
408 using namespace huff2;
412 std::shared_ptr<BitOStream> m_out;
416 template<
typename input_t>
418 const input_t& literals):
421 m_ordered_map_to_effective {
433 huffmantable_encode(*m_out, m_table);
439 m_out->write_int<
size_t>(v);
448 std::shared_ptr<BitIStream> m_in;
457 if(!m_in->read_bit()) {
466 m_prefix_sum_lengths = gen_prefix_sum_lengths(
467 std::move(ordered_codelengths),
474 if(m_ordered_map_from_effective.empty()) {
475 return m_in->read_int<
size_t>();
477 return huffman_decode(*m_in,
478 m_ordered_map_from_effective,
479 m_prefix_sum_lengths,
std::vector< size_t > Codelengths
T read_compressed_int(size_t b=7)
Reads a compressed integer from the input.
Contains the text compression and encoding framework.
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
HuffmanEncoder(const std::shared_ptr< BitOStream > &out, const input_t &literals)
std::vector< size_t > Firstcode
ExtendedHuffmantable(const input_t &inp)
std::vector< size_t > OrderedCodelengths
std::string vec_to_debug_string(const T &s, size_t indent=0)
Builds the string representation of a vector of byte values, sorrounded by square brackets ([ and ])...
size_t m_real_alphabet_size
void gen_huffmantable(Counts &&counts, size_t alphabet_size)
Wrapper for input streams that provides bitwise reading functionality.
OrderedCodelengths m_ordered_codelengths
OrderedMapFromEffective m_ordered_map_from_effective
std::vector< size_t > Counts
std::vector< size_t > PrefixSumLengths
std::vector< size_t > Numl
Encodes data to an ASCII character stream.
std::vector< size_t > MapFromEffective
Wrapper for output streams that provides bitwise writing functionality.
HuffmanDecoder(const std::shared_ptr< BitIStream > &in)
std::vector< size_t > Codewords
void write_compressed_int(T v, size_t b=7)
Writes a compressed integer to the input.
std::vector< size_t > OrderedMapFromEffective
size_t m_effective_alphabet_size
void write_int(T value, size_t bits=sizeof(T) *CHAR_BIT)
Writes the bit representation of an integer in MSB first order to the output.
std::vector< size_t > OrderedMapToEffective
bool eof() const
TODO document.
uint8_t read_bit()
Reads the next single bit from the input.
T read_int(size_t amount=sizeof(T) *CHAR_BIT)
Reads the integer value of the next amount bits in MSB first order.
Decodes data from an Huffman character stream.