19 template<
typename coder_t,
typename dict_t>
22 using node_t =
typename dict_t::node_t;
28 m_dict_max_size(
env.option(
"dict_size").as_integer())
40 const size_t n = input.
size();
41 const size_t reserved_size =
isqrt(n)*2;
46 IF_STATS(
size_t stat_dictionary_resets = 0);
47 IF_STATS(
size_t stat_dict_counter_at_last_reset = 0);
48 IF_STATS(
size_t stat_factor_count = 0);
49 size_t factor_count = 0;
51 size_t remaining_characters = n;
52 dict_t dict(
env().env_for_option(
"lz78trie"), n, remaining_characters, reserved_size+
ULITERAL_MAX+1);
53 auto reset_dict = [&dict] () {
57 const node_t node = dict.add_rootnode(i);
58 DCHECK_EQ(node.id(), dict.size() - 1);
59 DCHECK_EQ(node.id(), i);
60 ss << node.id() <<
", ";
65 typename coder_t::Encoder coder(
env().env_for_option(
"coder"), out,
NoLiterals());
68 if(!is.get(c))
return;
70 node_t node = dict.get_rootnode(static_cast<uliteral_t>(c));
73 --remaining_characters;
74 node_t child = dict.find_or_insert(node, static_cast<uliteral_t>(c));
75 DVLOG(2) <<
" child " << child.id() <<
" #factor " << factor_count <<
" size " << dict.size() <<
" node " << node.id();
82 node = dict.get_rootnode(static_cast<uliteral_t>(c));
84 if(dict.size() == m_dict_max_size) {
85 DCHECK_GT(dict.size(),0);
89 IF_STATS(stat_dict_counter_at_last_reset = m_dict_max_size);
96 DLOG(INFO) <<
"End node id of LZW parsing " << node.id();
104 phase.
log_stat(
"factor_count", stat_factor_count);
105 phase.
log_stat(
"dictionary_reset_counter", stat_dictionary_resets);
106 phase.
log_stat(
"max_factor_counter", stat_dict_counter_at_last_reset);
111 const size_t reserved_size = input.
size();
114 typename coder_t::Decoder decoder(
env().env_for_option(
"coder"), input);
132 }, out, m_dict_max_size == 0 ?
lz78::DMS_MAX : m_dict_max_size, reserved_size);
Represents a generic range of positive integers.
Contains the text compression and encoding framework.
virtual void decompress(Input &input, Output &output) override final
Decompress the given input to the given output.
Base for data compressors.
void decode_step(F next_code_callback, std::ostream &out, const CodeType dms, const CodeType reserve_dms)
LZWCompressor(Env &&env)
Maximum dictionary size before reset, 0 == unlimited.
Defines data encoding to and decoding from a stream of binary integer representations.
Provides access to runtime and memory measurement in statistics phases.
#define LZ78_DICT_SIZE_DESC
constexpr size_t ULITERAL_MAX
The maximum value of uliteral_t.
Env & env()
Provides access to the environment that the algorithm works in.
OutputStream as_stream() const
Creates a stream that allows for character-wise output.
An empty literal iterator that yields no literals whatsoever.
An abstraction layer for algorithm output.
#define IF_STATS(x)
x is compiled only when the STATS_DISABLED macro is undefined.
virtual void compress(Input &input, Output &out) override
Compress the given input to the given output.
void log_stat(const char *key, const T &value)
Logs a user statistic for this phase.
constexpr size_t DMS_MAX
Maximum legal dictionary size.
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
constexpr factorid_t undef_id
Id that can be used for a non-existing factor.
Local environment for a compression/encoding/decompression call.
LZ78 Trie Implementation based on Julius Pettersson (MIT/Expat License.) and Juha Nieminen's work...