17 std::vector<lz78::factorid_t> indices;
18 std::vector<uliteral_t> literals;
22 indices.push_back(index);
23 literals.push_back(literal);
24 std::vector<uliteral_t> buffer;
27 buffer.push_back(literal);
28 literal = literals[index - 1];
29 index = indices[index - 1];
33 for(
size_t i = buffer.size(); i > 0; i--) {
42 template <
typename coder_t,
typename dict_t>
45 using node_t =
typename dict_t::node_t;
53 m_dict_max_size(env.option(
"dict_size").as_integer())
65 const size_t n = input.
size();
66 const size_t reserved_size =
isqrt(n)*2;
72 IF_STATS(
size_t stat_dictionary_resets = 0);
73 IF_STATS(
size_t stat_dict_counter_at_last_reset = 0);
74 IF_STATS(
size_t stat_factor_count = 0);
75 size_t factor_count = 0;
77 size_t remaining_characters = n;
78 dict_t dict(env().env_for_option(
"lz78trie"), n, remaining_characters, reserved_size);
80 auto reset_dict = [&dict] () {
82 node_t node = dict.add_rootnode(0);
83 DCHECK_EQ(node.id(), dict.size() - 1);
84 DCHECK_EQ(node.id(), 0);
88 typename coder_t::Encoder coder(env().env_for_option(
"coder"), out,
NoLiterals());
91 node_t node = dict.get_rootnode(0);
93 DCHECK_EQ(node.id(), 0);
94 DCHECK_EQ(parent.id(), 0);
98 --remaining_characters;
99 node_t child = dict.find_or_insert(node, static_cast<uliteral_t>(c));
101 coder.encode(node.id(),
Range(factor_count));
102 coder.encode(static_cast<uliteral_t>(c),
literal_r);
105 parent = node = dict.get_rootnode(0);
106 DCHECK_EQ(node.id(), 0);
107 DCHECK_EQ(parent.id(), 0);
108 DCHECK_EQ(factor_count+1, dict.size());
115 IF_STATS(stat_dict_counter_at_last_reset = m_dict_max_size);
125 coder.encode(parent.id(),
Range(factor_count));
134 phase1.
log_stat(
"factor_count", stat_factor_count);
135 phase1.
log_stat(
"dictionary_reset_counter",
136 stat_dictionary_resets);
137 phase1.
log_stat(
"max_factor_counter",
138 stat_dict_counter_at_last_reset);
144 typename coder_t::Decoder decoder(env().env_for_option(
"coder"), input);
147 uint64_t factor_count = 0;
149 while (!decoder.eof()) {
Represents a generic range of positive integers.
Contains the text compression and encoding framework.
void decompress(lz78::factorid_t index, uliteral_t literal, std::ostream &out)
virtual void compress(Input &input, Output &out) override
Compress the given input to the given output.
#define tdc_unlikely(x)
Provides a hint to the compiler that x is expected to resolve to false.
uint8_t uliteral_t
Type to represent signed single literals.
Base for data compressors.
Defines data encoding to and decoding from a stream of binary integer representations.
virtual void decompress(Input &input, Output &output) override final
Decompress the given input to the given output.
Provides access to runtime and memory measurement in statistics phases.
#define LZ78_DICT_SIZE_DESC
OutputStream as_stream() const
Creates a stream that allows for character-wise output.
An empty literal iterator that yields no literals whatsoever.
LZ78Compressor(Env &&env)
Maximum dictionary size before reset, 0 == unlimited.
constexpr auto literal_r
Global predefined reange for literals.
An abstraction layer for algorithm output.
#define IF_STATS(x)
x is compiled only when the STATS_DISABLED macro is undefined.
void log_stat(const char *key, const T &value)
Logs a user statistic for this phase.
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Local environment for a compression/encoding/decompression call.
LZ78 Trie Implementation based on Julius Pettersson (MIT/Expat License.) and Juha Nieminen's work...