tudocomp
– The TU Dortmund Compression Framework
BWTCompressor.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/util.hpp>
5 #include <tudocomp/ds/bwt.hpp>
6 #include <tudocomp/ds/TextDS.hpp>
7 #include <tudocomp/util.hpp>
8 
10 
11 namespace tdc {
12 
13 template<typename text_t = TextDS<>>
14 class BWTCompressor : public Compressor {
15 
16 private:
17 // const TypeRange<len_t> len_r = TypeRange<len_t>();
18 
19 public:
20  inline static Meta meta() {
21  Meta m("compressor", "bwt", "BWT Compressor");
22  m.option("textds").templated<text_t, TextDS<>>("textds");
23  m.uses_textds<text_t>(ds::SA);
24  return m;
25  }
26 
28 
29  inline virtual void compress(Input& input, Output& output) override {
30  auto ostream = output.as_stream();
31  auto in = input.as_view();
32  DCHECK(in.ends_with(uint8_t(0)));
33 
34  text_t t(env().env_for_option("textds"), in, text_t::SA);
35  DVLOG(2) << vec_to_debug_string(t);
36  const len_t input_size = t.size();
37 
38  StatPhase::wrap("Construct Text DS", [&]{
39  t.require(text_t::SA);
40  DVLOG(2) << vec_to_debug_string(t.require_sa());
41  });
42 
43  const auto& sa = t.require_sa();
44  for(size_t i = 0; i < input_size; ++i) {
45  ostream << bwt::bwt(t,sa,i);
46  }
47  }
48 
49  inline virtual void decompress(Input& input, Output& output) override {
50  auto in = input.as_view();
51  auto ostream = output.as_stream();
52 
53  auto decoded_string = StatPhase::wrap("Decode BWT", [&]{
54  return bwt::decode_bwt(in);
55  });
56 
57  if(tdc_unlikely(decoded_string.empty())) {
58  return;
59  }
60 
61  StatPhase::wrap("Output Text", [&]{
62  ostream << decoded_string << '\0';
63  });
64  }
65 };
66 
67 }//ns
68 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
std::string decode_bwt(const bwt_t &bwt)
Decodes a BWT It is assumed that the BWT is stored in a container with access to operator[] and ...
Definition: bwt.hpp:77
std::string vec_to_debug_string(const T &s, size_t indent=0)
Builds the string representation of a vector of byte values, sorrounded by square brackets ([ and ])...
#define tdc_unlikely(x)
Provides a hint to the compiler that x is expected to resolve to false.
Definition: def.hpp:23
Base for data compressors.
Definition: Compressor.hpp:19
text_t::value_type bwt(const text_t &text, const sa_t &sa, const size_t i)
Computes the value BWT[i] of a text T given its suffix array SA Runs in O(1) time since BWT[i] = SA[(...
Definition: bwt.hpp:20
void uses_textds(ds::dsflags_t flags)
Indicates that this Algorithm uses the TextDS class, and how it does.
Definition: Meta.hpp:277
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
OutputStream as_stream() const
Creates a stream that allows for character-wise output.
virtual void compress(Input &input, Output &output) override
Compress the given input to the given output.
An abstraction layer for algorithm output.
Definition: Output.hpp:23
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
virtual void decompress(Input &input, Output &output) override
Decompress the given input to the given output.
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
Compressor(Compressor const &)=default
InputView as_view() const
Provides a view on the input that allows for random access.
Definition: Input.hpp:260
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
static Meta meta()
Manages text related data structures.
Definition: TextDS.hpp:30
An abstraction layer for algorithm input.
Definition: Input.hpp:37