tudocomp
– The TU Dortmund Compression Framework
CompressedLCP.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <type_traits>
4 
7 #include <tudocomp/ds/Select.hpp>
8 
10 
11 namespace tdc {
12 
15 template<typename sa_t>
16 class CompressedLCP : public Algorithm {
17 public:
19  using data_type = iv_t;
20 
21 private:
22  const sa_t* m_sa;
23 
24  len_t m_size;
25  len_t m_max;
26 
27  BitVector m_lcp;
28  Select1 m_select;
29 
30 public:
31  inline static Meta meta() {
32  Meta m("lcp", "compressed_lcp");
33  m.option("sa").templated<sa_t>("sa");
34  return m;
35  }
36 
38  return ds::InputRestrictions {};
39  }
40 
41 private:
42  inline void encode_unary(len_t x) {
43  while(x) {
44  --x;
45  m_lcp.emplace_back(0);
46  }
47  m_lcp.emplace_back(1);
48  }
49 
50 public:
51  template<typename textds_t>
52  inline CompressedLCP(Env&& env, textds_t& tds, CompressMode cm)
53  : Algorithm(std::move(env)) {
54 
55  // Suffix Array types must match
56  static_assert(std::is_same<sa_t, typename textds_t::sa_type>(),
57  "Suffix Array type mismatch!");
58 
59  // Require Suffix and PLCP Array
60  m_sa = &tds.require_sa(cm);
61  auto& plcp = tds.require_plcp(cm);
62 
63  m_size = plcp.size();
64  m_max = plcp.max_lcp();
65 
66  // Construct
67  StatPhase::wrap("Construct compressed LCP Array", [&]{
68  encode_unary(plcp[0] + 1);
69  for(size_t i = 1; i < m_size; i++) {
70  encode_unary(plcp[i] - plcp[i-1] + 1);
71  }
72 
73  m_select = Select1(m_lcp);
74  });
75  }
76 
77  inline len_t max_lcp() const {
78  return m_max;
79  }
80 
81  inline len_t operator[](len_t i) const {
82  const len_t j = (*m_sa)[i];
83  return m_select(j+1) - 2*j - 1;
84  }
85 
86  inline void compress() {
87  // nothing to do, already succinct :-)
88  }
89 
90  inline size_t size() const {
91  return m_size;
92  }
93 
94  inline iv_t relinquish() {
95  throw std::runtime_error("not supported"); //TODO: what to do?
96  }
97 
98  inline iv_t copy() const {
99  throw std::runtime_error("not supported"); //TODO: what to do?
100  }
101 };
102 
103 }
len_t operator[](len_t i) const
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
CompressedLCP(Env &&env, textds_t &tds, CompressMode cm)
static ds::InputRestrictions restrictions()
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
Constructs the LCP array from the Suffix and PLCP arrays, storing it in a manner as described by Fisc...
Select< 1 > Select1
Definition: Select.hpp:251
Describes a set of restrictions placed on input data.
len_t max_lcp() const
size_t size() const
void emplace_back(Args &&... args)
Definition: IntVector.hpp:481
CompressMode
Defines when data structures are bit-compressed.
Definition: CompressMode.hpp:6
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
static Meta meta()
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
IntVector< dynamic_t > DynamicIntVector
Represents an integer vector with unspecified (dynamic) bit width.
Definition: IntVector.hpp:553
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
Local environment for a compression/encoding/decompression call.
Interface for algorithms.
Definition: Algorithm.hpp:15
DynamicIntVector iv_t