doxygen/_esp_context_impl_8hpp_source.html

 #pragma once

 #include <tudocomp_stat/StatPhase.hpp>

 #include <tudocomp/compressors/esp/EspContext.hpp>
 #include <tudocomp/compressors/esp/RoundContext.hpp>
 #include <tudocomp/compressors/esp/meta_blocks.hpp>

 #include <tudocomp/compressors/esp/utils.hpp>

 namespace tdc {namespace esp {
     template<typename ipd_t>
     template<typename T>
     SLP EspContext<ipd_t>::generate_grammar(T&& input) {
         size_t root_node = 0;
         bool empty = false;

         SLP slp;
         size_t slp_counter = 256;
         size_t prev_slp_counter = 0;

         std::unique_ptr<Round<ipd_t>> round;

         // Initialize initial round
         {
             auto phase = with_env([&](auto& env) {
                 return StatPhase("Prepare round 0");
             });

             round = std::make_unique<Round<ipd_t>>(Round<ipd_t> {
                 GrammarRules<ipd_t>(256),
                 256, // TODO: Calc actual alphabet size
                 IntVector<dynamic_t>(),
             });
             round->string.width(bits_for(256 - 1));
             round->string.reserve(input.size(), bits_for(256 - 1));
             for (auto c : input) {
                 round->string.push_back(c);
             }
             auto discard = std::move(input);
         }

         for(size_t n = 0;; n++) {
             auto phase = with_env([&](auto& env) {
                 std::stringstream ss;
                 ss << "Round " << n;
                 return StatPhase(ss.str());
             });

             auto& r = *round;
             in_t in = r.string;

             esp::RoundContext<in_t> ctx {
                 r.alphabet,
                 in,
                 behavior_metablocks_maximimze_repeating,
                 behavior_landmarks_tie_to_right,
                 debug.round(),
             };

             ctx.debug.init(n, in, r.alphabet);

             if (in.size() == 0) {
                 empty = true;
                 ctx.debug.last_round(0, true);
                 break;
             }
             if (in.size() == 1) {
                 root_node = in[0] + prev_slp_counter;
                 ctx.debug.last_round(root_node, false);
                 break;
             }

             IntVector<dynamic_t> new_layer;
             size_t new_layer_width = bits_for(in.size() - 1);
             new_layer.width(new_layer_width);
             new_layer.reserve(in.size() / 2 + 1, new_layer_width);

             ctx.split(in);

             const auto& v = ctx.adjusted_blocks();

             ctx.debug.slice_symbol_map_start();
             {
                 in_t s = in;
                 for (auto e : v) {
                     auto slice = s.slice(0, e.len);
                     s = s.slice(e.len);
                     auto rule_name = r.gr.add(slice) - (r.gr.initial_counter() - 1);

                     ctx.debug.slice_symbol_map(slice, rule_name);

                     auto old_cap = new_layer.capacity();
                     new_layer.push_back(rule_name);
                     auto new_cap = new_layer.capacity();
                     DCHECK_EQ(old_cap, new_cap);
                 }
             }

             // Delete previous string
             r.string = IntVector<dynamic_t>();

             DCHECK_EQ(r.string.size(), 0);
             DCHECK_EQ(r.string.capacity(), 0);

             new_layer.shrink_to_fit();

             // Append to slp array
             {
                 size_t old_slp_size = slp.rules.size();
                 size_t additional_slp_size = r.gr.rules_count();
                 size_t new_slp_size = old_slp_size + additional_slp_size;

                 slp.rules.reserve(new_slp_size);
                 slp.rules.resize(new_slp_size);

                 auto& rv = slp.rules;

                 r.gr.for_all([&](const auto& k, const auto& val_) {
                     const auto& val = val_ - r.gr.initial_counter();
                     const auto& key = k.as_view();

                     size_t store_idx = slp_counter + val - 256;
                     rv[store_idx][0] = key[0] + prev_slp_counter;
                     rv[store_idx][1] = key[1] + prev_slp_counter;
                 });

                 prev_slp_counter = slp_counter;
                 slp_counter += additional_slp_size;
             }

             // carry over stats
             auto round_ipd_stats = r.gr.stats();
             ipd_stats.ext_size2_total += round_ipd_stats.ext_size2_total;
             ipd_stats.ext_size3_total += round_ipd_stats.ext_size3_total;
             ipd_stats.ext_size3_unique += round_ipd_stats.ext_size3_unique;
             ipd_stats.int_size2_total += round_ipd_stats.int_size2_total;
             ipd_stats.int_size2_unique += round_ipd_stats.int_size2_unique;

             // Delete previous hashmap
             r.gr.clear();

             // Prepare next round
             auto tmp = Round<ipd_t> {
                 GrammarRules<ipd_t>(r.gr.rules_count()),
                 r.gr.rules_count(),
                 std::move(new_layer),
             };

             round.reset();
             round = std::make_unique<Round<ipd_t>>(std::move(tmp));

             phase.log_stat("SLP size", slp.rules.size());
             phase.log_stat("ext_size2_total", round_ipd_stats.ext_size2_total);
             phase.log_stat("ext_size3_total", round_ipd_stats.ext_size3_total);
             phase.log_stat("ext_size3_unique", round_ipd_stats.ext_size3_unique);
             phase.log_stat("int_size2_total", round_ipd_stats.int_size2_total);
             phase.log_stat("int_size2_unique", round_ipd_stats.int_size2_unique);
         }

         slp.empty = empty;
         slp.root_rule = root_node;

         return slp;
     }
 }}
tdc::int_vector::IntVector::width
uint8_t width() const
Definition: IntVector.hpp:296

tdc::esp::in_t
BitPackingVectorSlice< dynamic_t > in_t
Definition: HashArray.hpp:9

tdc
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11

tdc::bits_for
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
Definition: include/tudocomp/util.hpp:194

tdc::esp::GrammarRules::rules_count
size_t rules_count() const
Definition: GrammarRules.hpp:82

tdc::esp::SLP::rules
std::vector< std::array< size_t, 2 > > rules
Definition: SLP.hpp:13

tdc::esp::SLP::root_rule
size_t root_rule
Definition: SLP.hpp:14

tdc::esp::Round
Definition: Rounds.hpp:7

tdc::int_vector::IntVector
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175

meta_blocks.hpp

tdc::int_vector::IntVector::push_back
void push_back(const value_type &val)
Definition: IntVector.hpp:426

utils.hpp

tdc::esp::SLP
Definition: SLP.hpp:12

StatPhase.hpp

tdc::StatPhase
Provides access to runtime and memory measurement in statistics phases.
Definition: StatPhase.hpp:44

tdc::esp::DebugRoundContext::init
void init(size_t number, const X &string, size_t alphabet_size)
Definition: DebugContext.hpp:241

tdc::esp::RoundContext::debug
DebugRoundContext debug
Definition: RoundContext.hpp:12

RoundContext.hpp

tdc::int_vector::IntVector::shrink_to_fit
void shrink_to_fit()
Definition: IntVector.hpp:369

tdc::esp::RoundContext
Definition: RoundContext.hpp:11

tdc::esp::GrammarRules
Definition: GrammarRules.hpp:17

tdc::esp::SLP::empty
bool empty
Definition: SLP.hpp:15

tdc::int_vector::IntVector::capacity
size_type capacity() const
Definition: IntVector.hpp:345

tdc::int_vector::IntVector::reserve
void reserve(size_type n)
Definition: IntVector.hpp:357

EspContext.hpp

tdc::esp::EspContext::generate_grammar
SLP generate_grammar(T &&s)
Definition: EspContextImpl.hpp:14