41 template<
typename literal_coder_t,
typename len_coder_t >
45 typedef std::tuple<uint,uint,uint> non_term;
46 typedef std::vector<non_term> non_terminal_symbols;
48 typedef std::vector<std::pair<uint,uint>> rules;
55 Meta m(
"lfs_comp_enc",
"lfs_enocde_strat");
66 DLOG(INFO) <<
"encoding dictionary symbol sizes ";
68 std::shared_ptr<BitOStream> bitout = std::make_shared<BitOStream>(output);
69 typename literal_coder_t::Encoder lit_coder(
70 env().env_for_option(
"lfs_lit_coder"),
74 typename len_coder_t::Encoder len_coder(
75 env().env_for_option(
"lfs_len_coder"),
80 auto it = dictionary.begin();
82 Range intrange (0, UINT_MAX);
83 if(dictionary.size() >=1 ){
85 std::pair<uint,uint> symbol = *it;
86 uint last_length=symbol.second;
87 Range s_length_r (0,last_length);
88 len_coder.encode(last_length,intrange);
91 while (it != dictionary.end()){
93 len_coder.encode(last_length-symbol.second,s_length_r);
94 last_length=symbol.second;
98 len_coder.encode(symbol.second,s_length_r);
100 len_coder.encode(0,intrange);
104 long buf_size = bitout->tellp();
110 DLOG(INFO) <<
"encoding dictionary symbols";
112 if(dictionary.size() >=1 ){
113 auto it = dictionary.begin();
114 std::pair<uint,uint> symbol;
116 while(it != dictionary.end()){
120 for(
uint k = 0; k<symbol.second; k++){
121 lit_coder.encode(in[symbol.first + k],
literal_r);
130 buf_size = long(bitout->tellp()) - buf_size;
135 Range dict_r(0, dictionary.size());
141 bool first_char =
true;
142 for(
auto it = nts_symbols.begin(); it!= nts_symbols.end(); it++){
145 std::tuple<uint,uint,uint> next_position = *it;
147 start_position = std::get<0>(next_position);
148 symbol_number =std::get<1>(next_position);
149 symbol_length = std::get<2>(next_position);
151 while(pos< start_position){
153 lit_coder.encode(0,
bit_r);
166 lit_coder.encode(1,
bit_r);
167 lit_coder.encode(symbol_number, dict_r);
169 pos += symbol_length;
173 while( pos<in.
size()){
174 lit_coder.encode(0,
bit_r);
181 buf_size = long(bitout->tellp()) - buf_size;
186 DLOG(INFO) <<
"compression with lfs done";
192 DLOG(INFO) <<
"decompress lfs";
193 std::shared_ptr<BitIStream> bitin = std::make_shared<BitIStream>(input);
195 typename literal_coder_t::Decoder lit_decoder(
196 env().env_for_option(
"lfs_lit_coder"),
199 typename len_coder_t::Decoder len_decoder(
200 env().env_for_option(
"lfs_len_coder"),
203 Range int_r (0,UINT_MAX);
205 uint symbol_length = len_decoder.template decode<uint>(int_r);
206 Range slength_r (0, symbol_length);
207 std::vector<uint> dict_lengths;
208 dict_lengths.reserve(symbol_length);
209 dict_lengths.push_back(symbol_length);
210 while(symbol_length>0){
212 uint current_delta = len_decoder.template decode<uint>(slength_r);
213 symbol_length-=current_delta;
214 dict_lengths.push_back(symbol_length);
216 dict_lengths.pop_back();
218 std::vector<std::string> dictionary;
219 uint dictionary_size = dict_lengths.size();
221 Range dictionary_r (0, dictionary_size);
224 uint length_of_symbol;
225 std::string non_terminal_symbol;
226 DLOG(INFO) <<
"reading dictionary";
227 for(
uint i = 0; i< dict_lengths.size();i++){
228 non_terminal_symbol =
"";
230 length_of_symbol=dict_lengths[i];
231 for(
uint i =0; i< length_of_symbol;i++){
232 c1 = lit_decoder.template decode<char>(
literal_r);
233 non_terminal_symbol += c1;
235 dictionary.push_back(non_terminal_symbol);
239 while(!lit_decoder.eof()){
241 bool bit1 = lit_decoder.template decode<bool>(
bit_r);
246 c1 = lit_decoder.template decode<char>(
literal_r);
251 symbol_number = lit_decoder.template decode<uint>(dictionary_r);
253 if(symbol_number < dictionary.size()){
255 ostream << dictionary.at(symbol_number);
257 DLOG(INFO)<<
"too large symbol: " << symbol_number;
Represents a generic range of positive integers.
Contains the text compression and encoding framework.
constexpr auto bit_r
Global predefined range for bits (0 or 1).
size_type size() const
Returns size of the View.
A literal iterator that yields every character from a View.
void decode(Input &input, Output &output)
Algorithm(Algorithm const &)=default
Env & env()
Provides access to the environment that the algorithm works in.
OutputStream as_stream() const
Creates a stream that allows for character-wise output.
An empty literal iterator that yields no literals whatsoever.
constexpr auto literal_r
Global predefined reange for literals.
An abstraction layer for algorithm output.
Defines data encoding to and decoding from a stream of Elias-Gamma codes.
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
void encode(io::InputView &in, Output &output, rules &dictionary, non_terminal_symbols &nts_symbols)
Interface for algorithms.