10 namespace tdc {
namespace esp {
15 Meta m(
"d_coding",
"huffman");
22 template<
typename rhs_t>
23 inline void encode(
const rhs_t& rhs, std::shared_ptr<BitOStream>& out,
size_t bit_width,
size_t max_value)
const {
26 for (
size_t i = 0; i < rhs.size(); i++) {
30 template<
typename rhs_t>
31 inline void decode(rhs_t& rhs, std::shared_ptr<BitIStream>& in,
size_t bit_width,
size_t max_value)
const {
34 for (
size_t i = 0; i < rhs.size(); i++) {
42 Meta m(
"d_coding",
"arithmetic");
49 template<
typename rhs_t>
50 inline void encode(
const rhs_t& rhs, std::shared_ptr<BitOStream>& out,
size_t bit_width,
size_t max_value)
const {
53 for (
size_t i = 0; i < rhs.size(); i++) {
57 template<
typename rhs_t>
58 inline void decode(rhs_t& rhs, std::shared_ptr<BitIStream>& in,
size_t bit_width,
size_t max_value)
const {
61 for (
size_t i = 0; i < rhs.size(); i++) {
62 rhs[i] = decoder.decode();
69 Meta m(
"d_coding",
"plain");
75 template<
typename rhs_t>
76 inline void encode(
const rhs_t& rhs,
BitOStream& out,
size_t bit_width,
size_t max_value)
const {
77 for(
size_t i = 0; i < rhs.size(); i++) {
81 template<
typename rhs_t>
82 inline void decode(rhs_t& rhs,
BitIStream& in,
size_t bit_width,
size_t max_value)
const {
83 for(
size_t i = 0; i < rhs.size(); i++) {
84 rhs[i] = in.
read_int<
size_t>(bit_width);
87 template<
typename rhs_t>
88 inline void encode(
const rhs_t& rhs, std::shared_ptr<BitOStream>& out,
size_t bit_width,
size_t max_value)
const {
89 encode(rhs, *out, bit_width, max_value);
91 template<
typename rhs_t>
92 inline void decode(rhs_t& rhs, std::shared_ptr<BitIStream>& in,
size_t bit_width,
size_t max_value)
const {
93 decode(rhs, *in, bit_width, max_value);
99 Meta m(
"d_coding",
"wavelet_tree");
105 template<
typename rhs_t>
106 inline void encode(
const rhs_t& rhs,
BitOStream& bout,
size_t bit_width,
size_t max_value)
const {
107 auto bvs =
make_wt(rhs, max_value);
114 for(
auto& bv : bvs) {
115 for(uint8_t bit: bv) {
121 template<
typename rhs_t>
122 inline void decode(rhs_t& rhs,
BitIStream& bin,
size_t bit_width,
size_t max_value)
const {
126 auto Dcombined_bvs = std::vector<IntVector<uint_t<1>>>();
127 Dcombined_bvs.reserve(wt_depth);
128 Dcombined_bvs.resize(wt_depth);
130 for(
auto& bv : Dcombined_bvs) {
131 bv.reserve(rhs.size());
133 for(
size_t i = 0; i < rhs.size(); i++) {
139 for (
size_t i = 0; i < rhs.size(); i++) {
140 rhs[i] = Dcombined[i];
144 template<
typename rhs_t>
145 inline void encode(
const rhs_t& rhs, std::shared_ptr<BitOStream>& out,
size_t bit_width,
size_t max_value)
const {
146 encode(rhs, *out, bit_width, max_value);
148 template<
typename rhs_t>
149 inline void decode(rhs_t& rhs, std::shared_ptr<BitIStream>& in,
size_t bit_width,
size_t max_value)
const {
150 decode(rhs, *in, bit_width, max_value);
153 template<
typename subseq_t = SubSeqOptimal,
typename d_coding_t = DWaveletTree>
157 Meta m(
"d_coding",
"succinct");
164 template<
typename rhs_t>
165 inline void encode(
const rhs_t& rhs, std::shared_ptr<BitOStream>& out,
size_t bit_width,
size_t max_value)
const {
175 phase1.split(
"Write out B array");
180 for (
size_t i = 0; i < rhs.size(); i++) {
181 auto v = rhs[sis[i]];
184 size_t diff = v - last;
190 phase1.split(
"Create Dpi and b arrays");
193 std::vector<size_t> Dpi;
197 auto tmp = subseq.create_dpi_and_b_from_sorted_indices(sis);
198 Dpi = std::move(tmp.Dpi);
199 b = std::move(tmp.b);
201 size_t b_size = b.size();
202 phase1.log_stat(
"Subsequence count", b_size);
205 DCHECK_GE(b_size, 1);
210 phase1.split(
"Write out b, discard");
219 phase1.split(
"Create Dsi from Dpi");
223 phase1.split(
"Combine Dsi and Dpi arrays");
225 auto Dcombined = std::move(Dpi);
226 Dcombined.reserve(Dcombined.size() * 2);
228 Dcombined.push_back(x);
230 Dsi = std::vector<size_t>();
232 phase1.split(
"Encode Dcombined");
236 auto d_max_value = b_size - 1;
237 auto d_bit_width =
bits_for(d_max_value);
239 coder.encode(Dcombined, out, d_bit_width, d_max_value);
241 template<
typename rhs_t>
242 inline void decode(rhs_t& D, std::shared_ptr<BitIStream>& in,
size_t bit_width,
size_t max_value)
const {
244 auto slp_size = D.size();
248 std::vector<size_t> Bde;
249 Bde.reserve(slp_size);
252 for(
size_t i = 0; i < slp_size && !bin.
eof(); i++) {
266 for(
size_t i = 0; i < b_size; i++) {
272 auto Dcombined = std::vector<size_t>();
273 Dcombined.reserve(slp_size * 2);
274 Dcombined.resize(slp_size * 2);
278 auto d_max_value = b_size - 1;
279 auto d_bit_width =
bits_for(d_max_value);
281 coder.decode(Dcombined, in, d_bit_width, d_max_value);
291 template<
typename vec_t>
294 const size_t bit_width,
295 const size_t diff_bit_width,
299 auto cvec = [&](
size_t i) ->
size_t {
300 return size_t(
typename vec_t::value_type(vec[i]));
303 uint64_t written_bits = 0;
307 uint64_t unary_sep_bit_counter = 0;
308 uint64_t unary_val_bit_counter = 0;
309 uint64_t unary_sign_bit_counter = 0;
312 for(
size_t i = 0; i < vec.size(); i++) {
313 const size_t current = cvec(i);
315 if (current >= last) {
316 diff = current - last;
318 diff = last - current;
321 unary_sep_bit_counter += 1;
322 unary_val_bit_counter += diff;
324 unary_sign_bit_counter += 1;
329 uint64_t diff_val_counter = unary_sign_bit_counter;
330 if (vec.size() > 0 && cvec(0) == 0) {
334 const uint64_t bits_unary
335 = unary_sep_bit_counter + unary_val_bit_counter + unary_sign_bit_counter;
336 const uint64_t bits_binary
337 = (diff_val_counter) * (bit_width + diff_bit_width)
340 phase.log_stat(
"predicted unary bits", bits_unary);
341 phase.log_stat(
"predicted binary bits", bits_binary);
343 const bool use_unary = (bits_unary <= bits_binary);
344 out.write_bit(use_unary);
347 phase.log_stat(
"use unary", use_unary);
358 size_t diff_sign_bits = 0;
359 size_t sign_bits = 0;
362 for(
size_t i = 0; i < vec.size(); i++) {
363 const size_t current = cvec(i);
365 if (current >= last) {
366 diff = current - last;
368 diff = last - current;
370 out.write_unary(diff);
371 written_bits += (diff + 1);
373 diff_sign_bits += (diff + 1);
375 phase.log_stat(
"diff bits", diff_sign_bits);
379 for(
size_t i = 0; i < vec.size(); i++) {
380 const size_t current = cvec(i);
382 if (last < current) {
386 }
else if (last > current) {
387 out.write_bit(
false);
394 phase.log_stat(
"sign bits", sign_bits);
397 CHECK_EQ(diff_sign_bits + sign_bits, bits_unary);
399 size_t out_counter = 0;
400 size_t last_value = 0;
401 size_t value_counter = 0;
402 size_t binary_bits = 0;
404 for(
size_t i = 0; i < vec.size(); i++) {
405 const size_t current = cvec(i);
407 if (current == last_value) {
411 if (value_counter > 0) {
412 out.write_int<
size_t>(value_counter, bit_width);
413 out.write_int<
size_t>(last_value, diff_bit_width);
414 binary_bits += (bit_width + diff_bit_width);
415 written_bits += (bit_width + diff_bit_width);
419 last_value = current;
423 if (value_counter > 0) {
424 out.write_int<
size_t>(value_counter, bit_width);
425 out.write_int<
size_t>(last_value, diff_bit_width);
426 binary_bits += (bit_width + diff_bit_width);
427 written_bits += (bit_width + diff_bit_width);
431 phase.log_stat(
"binary bits", binary_bits);
433 CHECK_EQ(binary_bits, bits_binary);
434 CHECK_EQ(out_counter, diff_val_counter);
439 template<
typename vec_t>
442 const size_t bit_width,
443 const size_t diff_bit_width,
445 auto cvec = [&](
size_t i) ->
size_t {
446 return size_t(
typename vec_t::value_type(vec[i]));
449 const bool use_unary = in.
read_bit();
452 for(
size_t i = 0; i < vec.size(); i++) {
457 for(
size_t i = 0; i < vec.size(); i++) {
458 const size_t diff = cvec(i);
474 while(vec_i < vec.size()) {
475 const size_t repeats = in.
read_int<
size_t>(bit_width);
476 const size_t val = in.
read_int<
size_t>(diff_bit_width);
477 const size_t vec_i_end = vec_i + repeats;
478 for (; vec_i < vec_i_end; vec_i++) {
488 Meta m(
"d_coding",
"diff");
494 template<
typename rhs_t>
495 inline void encode(
const rhs_t& rhs,
BitOStream& out,
size_t bit_width,
size_t max_value)
const {
499 template<
typename rhs_t>
503 template<
typename rhs_t>
504 inline void encode(
const rhs_t& rhs, std::shared_ptr<BitOStream>& out,
size_t bit_width,
size_t max_value)
const {
505 encode(rhs, *out, bit_width, max_value);
507 template<
typename rhs_t>
508 inline void decode(rhs_t& rhs, std::shared_ptr<BitIStream>& in,
size_t bit_width,
size_t max_value)
const {
509 decode(rhs, *in, bit_width, max_value);
514 inline static bool perc_diff(
double a,
double b,
double diff) {
515 auto r = std::abs((a - b) / ((a + b) / 2.0)) <= diff;
525 Meta m(
"d_coding",
"range_fit");
534 template<
typename rhs_t>
535 inline void encode(
const rhs_t& rhs,
BitOStream& out,
size_t bit_width,
size_t max_value)
const {
537 const size_t size = rhs.size();
540 double threshold = 0.0;
542 threshold = double(
env().option(
"threshold").as_integer()) / 100.0;
547 std::vector<size_t> mins;
551 size_t min = size_t(-1);
552 for(
size_t i = 0; i < size; i++) {
553 const size_t j = size - i - 1;
554 const size_t current = rhs[j];
564 for(
size_t i = 0; i < size; i++) {
565 if (perc_diff(mins[i], last, threshold)) {
566 DCHECK_GE(mins[i], last);
579 size_t last_min_flush = 0;
580 for(
size_t i = 0; i < size; i++) {
581 const size_t current = rhs[i];
588 const size_t min = mins[i];
589 const size_t min_max_bits =
bits_for(max - min);
590 const size_t max_bits =
bits_for(max - 0);
591 if (max_bits == min_max_bits && last_min_flush == 0) {
594 last_min_flush = mins[i];
597 const size_t min = mins[i];
598 const size_t range = max - min;
602 uint64_t writte_bits_mins =
604 uint64_t written_bits_bit_range =
607 uint64_t written_bits_values = 0;
608 for(
size_t i = 0; i < size; i++) {
609 const size_t current = rhs[i];
610 const size_t min = mins[i];
611 const size_t compact_value = current - min;
612 const size_t bits = size_t(
uint_t<6>(bit_ranges[i]));
613 written_bits_values += bits;
615 out.
write_int<
size_t>(compact_value, bits);
618 phase.log_stat(
"writte_bits_mins", writte_bits_mins);
619 phase.log_stat(
"written_bits_bit_range", written_bits_bit_range);
620 phase.log_stat(
"written_bits_values", written_bits_values);
621 phase.log_stat(
"written_bits_all",
622 writte_bits_mins + written_bits_bit_range + written_bits_values
624 phase.log_stat(
"written_bits_all_plain", rhs.size() * bit_width);
625 phase.log_stat(
"elements", rhs.size());
626 phase.log_stat(
"max_bit_width", bit_width);
628 std::vector<size_t> maxs;
633 for(
size_t i = 0; i < size; i++) {
634 const size_t current = rhs[i];
643 size_t last = size_t(-1);
644 for(
size_t i = 0; i < size; i++) {
645 const size_t j = size - i - 1;
647 if (perc_diff(maxs[j], last, threshold)) {
648 DCHECK_LE(maxs[j], last);
655 std::vector<size_t> ranges;
656 ranges.reserve(size);
659 size_t last_min_flush = 0;
660 for(
size_t i = 0; i < size; i++) {
662 const size_t bits_min_max =
bits_for(maxs[i] - mins[i]);
663 const size_t bits_max =
bits_for(maxs[i] - 0);
664 if (bits_min_max == bits_max && last_min_flush == 0) {
667 last_min_flush = mins[i];
670 ranges[i] = maxs[i] - mins[i];
675 for(
size_t i = 0; i < size; i++) {
676 const size_t j = size - i - 1;
678 if (ranges[j] < last) {
679 if (perc_diff(ranges[j], last, threshold)) {
680 DCHECK_LE(ranges[j], last);
689 for(
size_t i = 0; i < size; i++) {
690 if (ranges[i] < last) {
691 if (perc_diff(ranges[i], last, threshold)) {
692 DCHECK_LE(ranges[i], last);
703 size_t range_chunk_i = 0;
704 while (range_chunk_i < ranges.size()) {
705 size_t range_chunk_j = range_chunk_i;
706 while(range_chunk_j < ranges.size()
707 && (ranges[range_chunk_j] == ranges[range_chunk_i])) {
712 const auto range = ranges[range_chunk_i];
715 const std::vector<size_t>* mins;
720 inline size_t operator[](
size_t x)
const {
721 return (*vals)[x + i] - (*mins)[x + i];
724 inline size_t size()
const {
729 auto cv = ChunkView {
739 DCHECK_EQ(bvs.size(),
bits_for(range) - 1);
741 DCHECK_EQ(bvs.size(),
bits_for(range));
744 for(
const auto& bv : bvs) {
747 for(
size_t i = 0; i < cv.size(); i++) {
748 size_t j = cv.size() - i - 1;
758 for(
size_t i = 0; i < cv.size() - tnull; i++) {
764 range_chunk_i = range_chunk_j;
768 template<
typename rhs_t>
770 const size_t size = rhs.size();
773 std::vector<size_t> mins;
786 for(
size_t i = 0; i < size; i++) {
790 std::vector<size_t> ranges;
791 ranges.reserve(size);
796 size_t range_chunk_i = 0;
797 while (range_chunk_i < ranges.size()) {
798 size_t range_chunk_j = range_chunk_i;
799 while(range_chunk_j < ranges.size()
800 && (ranges[range_chunk_j] == ranges[range_chunk_i])) {
805 const auto range = ranges[range_chunk_i];
806 size_t cv_size = range_chunk_j - range_chunk_i;
816 std::vector<IntVector<uint_t<1>>> bvs(bv_c);
817 for(
size_t bv_i = 0; bv_i < bv_c; bv_i++) {
818 auto& bv = bvs[bv_i];
822 for(
size_t i = 0; i < cv_size - tnull; i++) {
825 for(
size_t i = cv_size - tnull; i < cv_size; i++) {
832 for(
size_t i = range_chunk_i; i < range_chunk_j; i++) {
833 rhs[i] = vec[i - range_chunk_i] + mins[i];
837 range_chunk_i = range_chunk_j;
841 template<
typename rhs_t>
842 inline void encode(
const rhs_t& rhs, std::shared_ptr<BitOStream>& out,
size_t bit_width,
size_t max_value)
const {
843 encode(rhs, *out, bit_width, max_value);
845 template<
typename rhs_t>
846 inline void decode(rhs_t& rhs, std::shared_ptr<BitIStream>& in,
size_t bit_width,
size_t max_value)
const {
847 decode(rhs, *in, bit_width, max_value);
T read_compressed_int(size_t b=7)
Reads a compressed integer from the input.
Contains the text compression and encoding framework.
void decode(rhs_t &rhs, std::shared_ptr< BitIStream > &in, size_t bit_width, size_t max_value) const
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
Encodes data to an ASCII character stream.
std::vector< Sindex > sorted_indices(const View &input)
void encode(const rhs_t &rhs, std::shared_ptr< BitOStream > &out, size_t bit_width, size_t max_value) const
A vector over arbitrary unsigned integer types.
void write_bit(bool set)
Writes a single bit to the output.
Env env_for_option(const std::string &option) const
Create the environment for a sub algorithm option.
const std::string & as_string() const
A const view into a slice of memory.
void decode(rhs_t &rhs, std::shared_ptr< BitIStream > &in, size_t bit_width, size_t max_value) const
const OptionValue & option(const std::string &option) const
Get an option of this algorithm.
void push_back(const value_type &val)
void encode(const rhs_t &rhs, std::shared_ptr< BitOStream > &out, size_t bit_width, size_t max_value) const
Decodes data from an Arithmetic character stream.
auto make_wt(const Dxx_t &v, size_t max_char) -> std::vector< IntVector< uint_t< 1 >>>
void write_unary(value_t v)
void encode(const rhs_t &rhs, std::shared_ptr< BitOStream > &out, size_t bit_width, size_t max_value) const
Wrapper for input streams that provides bitwise reading functionality.
void decode(rhs_t &rhs, BitIStream &bin, size_t bit_width, size_t max_value) const
void encode(const rhs_t &rhs, std::shared_ptr< BitOStream > &out, size_t bit_width, size_t max_value) const
auto recover_D_from_encoding(const Dxx_t &Dpi, const Dxx_t &Dsi, const b_t &b, const Bde_t &Bde, D_t *out)
void encode(const rhs_t &rhs, BitOStream &out, size_t bit_width, size_t max_value) const
Provides access to runtime and memory measurement in statistics phases.
Algorithm(Algorithm const &)=default
void decode(rhs_t &D, std::shared_ptr< BitIStream > &in, size_t bit_width, size_t max_value) const
auto recover_Dxx(const std::vector< IntVector< uint_t< 1 >>> &bvs, size_t size) -> std::vector< size_t >
void encode(const rhs_t &rhs, BitOStream &out, size_t bit_width, size_t max_value) const
void decode(rhs_t &rhs, std::shared_ptr< BitIStream > &in, size_t bit_width, size_t max_value) const
void encode(const rhs_t &rhs, std::shared_ptr< BitOStream > &out, size_t bit_width, size_t max_value) const
Encodes data to an ASCII character stream.
typename uint_dispatch_t< N >::type uint_t
Env & env()
Provides access to the environment that the algorithm works in.
Wrapper for output streams that provides bitwise writing functionality.
void decode(rhs_t &rhs, BitIStream &in, size_t bit_width, size_t max_value) const
void decode(rhs_t &rhs, std::shared_ptr< BitIStream > &in, size_t bit_width, size_t max_value) const
std::vector< size_t > create_dsigma_from_dpi_and_sorted_indices(const SortedIndices &sorted_indices, const Dpi_t &Dpi)
void encode(const rhs_t &rhs, std::shared_ptr< BitOStream > &out, size_t bit_width, size_t max_value) const
void encode(const rhs_t &rhs, BitOStream &bout, size_t bit_width, size_t max_value) const
void write_compressed_int(T v, size_t b=7)
Writes a compressed integer to the input.
void encode(const rhs_t &rhs, BitOStream &out, size_t bit_width, size_t max_value) const
void decode(rhs_t &rhs, std::shared_ptr< BitIStream > &in, size_t bit_width, size_t max_value) const
void decode_unary_diff(vec_t &vec, BitIStream &in, const size_t bit_width, const size_t diff_bit_width, const bool sign)
void write_int(T value, size_t bits=sizeof(T) *CHAR_BIT)
Writes the bit representation of an integer in MSB first order to the output.
auto encode_unary_diff(vec_t &vec, BitOStream &out, const size_t bit_width, const size_t diff_bit_width, const bool sign, StatPhase &phase) -> uint64_t
void reserve(size_type n)
void decode(rhs_t &rhs, BitIStream &in, size_t bit_width, size_t max_value) const
bool eof() const
TODO document.
uint8_t read_bit()
Reads the next single bit from the input.
void decode(rhs_t &rhs, BitIStream &in, size_t bit_width, size_t max_value) const
Interface for algorithms.
void decode(rhs_t &rhs, std::shared_ptr< BitIStream > &in, size_t bit_width, size_t max_value) const
T read_int(size_t amount=sizeof(T) *CHAR_BIT)
Reads the integer value of the next amount bits in MSB first order.
Decodes data from an Huffman character stream.
void encode(const rhs_t &rhs, std::shared_ptr< BitOStream > &out, size_t bit_width, size_t max_value) const