tudocomp
– The TU Dortmund Compression Framework
TextDS.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 #include <tudocomp/Algorithm.hpp>
6 
8 
9 //Defaults
15 
16 namespace tdc {
17 
18 static_assert(
19  std::is_same<View::value_type, uliteral_t>::value,
20  "View::value_type and uliteral_t must be the same");
21 
23 template<
24  typename sa_t = SADivSufSort,
25  typename phi_t = PhiFromSA,
26  typename plcp_t = PLCPFromPhi,
27  typename lcp_t = LCPFromPLCP,
28  typename isa_t = ISAFromSA
29 >
30 class TextDS : public Algorithm {
31 public:
33  static const dsflags_t SA = ds::SA;
34  static const dsflags_t ISA = ds::ISA;
35  static const dsflags_t LCP = ds::LCP;
36  static const dsflags_t PHI = ds::PHI;
37  static const dsflags_t PLCP = ds::PLCP;
38 
40 
41  using sa_type = sa_t;
42  using phi_type = phi_t;
43  using plcp_type = plcp_t;
44  using lcp_type = lcp_t;
45  using isa_type = isa_t;
46 
49 
50  if (flags & SA) rest |= sa_type::restrictions();
51  if (flags & ISA) rest |= isa_type::restrictions();
52  if (flags & LCP) rest |= lcp_type::restrictions();
53  if (flags & PHI) rest |= phi_type::restrictions();
54  if (flags & PLCP) rest |= plcp_type::restrictions();
55 
56  return rest;
57  };
58 
59 private:
61 
62  View m_text;
63 
64  std::unique_ptr<sa_t> m_sa;
65  std::unique_ptr<phi_t> m_phi;
66  std::unique_ptr<plcp_t> m_plcp;
67  std::unique_ptr<lcp_t> m_lcp;
68  std::unique_ptr<isa_t> m_isa;
69 
70  dsflags_t m_ds_requested;
71  CompressMode m_cm;
72 
73  template<typename ds_t>
74  inline std::unique_ptr<ds_t> construct_ds(const std::string& option, CompressMode cm) {
75  return std::make_unique<ds_t>(
76  env().env_for_option(option),
77  *this,
78  cm_select(cm, m_cm));
79  }
80 
81  template<typename ds_t>
82  inline const ds_t& require_ds(
83  std::unique_ptr<ds_t>& p, const std::string& option, CompressMode cm) {
84 
85  if(!p) p = construct_ds<ds_t>(option, cm_select(cm, m_cm));
86  return *p;
87  }
88 
89  template<typename ds_t>
90  inline void discard_ds(std::unique_ptr<ds_t>& p, dsflags_t flag) {
91  p.reset(nullptr);
92  }
93 
94  template<typename ds_t>
95  inline ds_t release_ds(std::unique_ptr<ds_t>& p, dsflags_t flag, string_ref err_msg) {
96  DCHECK(p) << "TextDS did not contain a " << err_msg;
97  return std::move(*p);
98  }
99 
100  template<typename ds_t>
101  inline typename ds_t::data_type inplace_ds(
102  std::unique_ptr<ds_t>& p, dsflags_t flag, const std::string& option, CompressMode cm) {
103 
104  if(!p) p = construct_ds<ds_t>(option, cm_select(cm, m_cm));
105  if(m_ds_requested & flag) {
106  // data structure is requested, return a copy of the data
107  return p->copy();
108  } else {
109  // relinquish data and discard ds
110  auto data = p->relinquish();
111  p.reset(nullptr);
112  return std::move(data);
113  }
114  }
115 
116 public:
117  inline static Meta meta() {
118  Meta m("textds", "textds");
119  m.option("sa").templated<sa_t, SADivSufSort>("sa");
120  m.option("phi").templated<phi_t, PhiFromSA>("phi");
121  m.option("plcp").templated<plcp_t, PLCPFromPhi>("plcp");
122  m.option("lcp").templated<lcp_t, LCPFromPLCP>("lcp");
123  m.option("isa").templated<isa_t, ISAFromSA>("isa");
124  m.option("compress").dynamic("delayed");
125  return m;
126  }
127 
128  inline TextDS(Env&& env, const View& text)
129  : Algorithm(std::move(env)),
130  m_text(text), m_ds_requested(0) {
131 
132  if(!m_text.ends_with(uint8_t(0))){
133  throw std::logic_error(
134  "Input has no sentinel! Please make sure you declare "
135  "the compressor calling this with "
136  "`m.needs_sentinel_terminator()` in its `meta()` function."
137  );
138  }
139 
140  auto& cm_str = this->env().option("compress").as_string();
141  if(cm_str == "delayed") {
142  m_cm = CompressMode::delayed;
143  } else if(cm_str == "compressed") {
145  } else {
146  m_cm = CompressMode::plain;
147  }
148  }
149 
151  : TextDS(std::move(env), text) {
152 
153  require(flags, cm);
154  }
155 
156  // require methods
157 
158  inline const sa_t& require_sa(CompressMode cm = CompressMode::select) {
159  return require_ds(m_sa, "sa", cm);
160  }
161  inline const phi_t& require_phi(CompressMode cm = CompressMode::select) {
162  return require_ds(m_phi, "phi", cm);
163  }
164  inline const plcp_t& require_plcp(CompressMode cm = CompressMode::select) {
165  return require_ds(m_plcp, "plcp", cm);
166  }
167  inline const lcp_t& require_lcp(CompressMode cm = CompressMode::select) {
168  return require_ds(m_lcp, "lcp", cm);
169  }
170  inline const isa_t& require_isa(CompressMode cm = CompressMode::select) {
171  return require_ds(m_isa, "isa", cm);
172  }
173 
174  // inplace methods
175 
176  inline typename sa_t::data_type inplace_sa(
178 
179  return inplace_ds(m_sa, SA, "sa", cm);
180  }
181  inline typename phi_t::data_type inplace_phi(
183 
184  return inplace_ds(m_phi, PHI, "phi", cm);
185  }
186  inline typename plcp_t::data_type inplace_plcp(
188 
189  return inplace_ds(m_plcp, PLCP, "plcp", cm);
190  }
191  inline typename lcp_t::data_type inplace_lcp(
193 
194  return inplace_ds(m_lcp, LCP, "lcp", cm);
195  }
196  inline typename isa_t::data_type inplace_isa(
198 
199  return inplace_ds(m_isa, ISA, "isa", cm);
200  }
201 
202  // release methods
203 
204  inline sa_t release_sa() {
205  return release_ds(m_sa, SA, "SA");
206  }
207  inline phi_t release_phi() {
208  return release_ds(m_phi, PHI, "PHI");
209  }
210  inline plcp_t release_plcp() {
211  return release_ds(m_plcp, PLCP, "PLCP");
212  }
213  inline lcp_t release_lcp() {
214  return release_ds(m_lcp, LCP, "LCP");
215  }
216  inline isa_t release_isa() {
217  return release_ds(m_isa, ISA, "ISA");
218  }
219 
220 private:
221  inline void discard_sa() {
222  discard_ds(m_sa, SA);
223  }
224  inline void discard_phi() {
225  discard_ds(m_phi, PHI);
226  }
227  inline void discard_plcp() {
228  discard_ds(m_plcp, PLCP);
229  }
230  inline void discard_lcp() {
231  discard_ds(m_lcp, LCP);
232  }
233  inline void discard_isa() {
234  discard_ds(m_isa, ISA);
235  }
236 
237  inline void discard_unneeded() {
238  // discard unrequested structures
239  if(!(m_ds_requested & SA)) discard_sa();
240  if(!(m_ds_requested & PHI)) discard_phi();
241  if(!(m_ds_requested & PLCP)) discard_plcp();
242  if(!(m_ds_requested & LCP)) discard_lcp();
243  if(!(m_ds_requested & ISA)) discard_isa();
244  }
245 
246 public:
248  m_ds_requested = flags;
249 
250  // TODO: we need something like a dependency graph here
251 
252  // construct requested structures
253  cm = cm_select(cm,(m_cm == CompressMode::delayed ?
255 
256 
257  // Construct SA (don't compress yet)
258  if(flags & SA) { require_sa(cm); discard_unneeded(); }
259 
260  // Construct Phi (don't compress yet)
261  if(flags & PHI) { require_phi(cm); discard_unneeded(); }
262 
263  // Construct PLCP and compress if LCP is not requested
264  if(flags & PLCP) {
265  require_plcp(cm);
266  discard_unneeded();
267  if(cm == CompressMode::coherent_delayed && !(flags & LCP)) {
268  m_plcp->compress();
269  }
270  }
271 
272  // Construct and compress LCP
273  if(flags & LCP) {
274  require_lcp(cm);
275  discard_unneeded();
276  if(cm == CompressMode::coherent_delayed) m_lcp->compress();
277  }
278 
279  // Construct and compress ISA
280  if(flags & ISA) {
281  require_isa(cm);
282  discard_unneeded();
283  if(cm == CompressMode::coherent_delayed) m_isa->compress();
284  }
285 
286  // Compress data structures that had dependencies
288  if(m_sa) m_sa->compress();
289  if(m_phi) m_phi->compress();
290  if(m_plcp) m_plcp->compress();
291  }
292  }
293 
295  inline value_type operator[](size_t i) const {
296  return m_text[i];
297  }
298 
300  inline const value_type* text() const {
301  return m_text.data();
302  }
303 
305  inline size_t size() const {
306  return m_text.size();
307  }
308 
309  inline void print(std::ostream& out, size_t base) {
310  size_t w = std::max(8UL, (size_t)std::log10((double)size()) + 1);
311  out << std::setfill(' ');
312 
313  //Heading
314  out << std::setw(w) << "i" << " | ";
315  if(m_sa) out << std::setw(w) << "SA[i]" << " | ";
316  if(m_phi) out << std::setw(w) << "Phi[i]" << " | ";
317  if(m_plcp) out << std::setw(w) << "PLCP[i]" << " | ";
318  if(m_lcp) out << std::setw(w) << "LCP[i]" << " | ";
319  if(m_isa) out << std::setw(w) << "ISA[i]" << " | ";
320  out << std::endl;
321 
322  //Separator
323  out << std::setfill('-');
324  out << std::setw(w) << "" << "-|-";
325  if(m_sa) out << std::setw(w) << "" << "-|-";
326  if(m_phi) out << std::setw(w) << "" << "-|-";
327  if(m_plcp) out << std::setw(w) << "" << "-|-";
328  if(m_lcp) out << std::setw(w) << "" << "-|-";
329  if(m_isa) out << std::setw(w) << "" << "-|-";
330  out << std::endl;
331 
332  //Body
333  out << std::setfill(' ');
334  for(size_t i = 0; i < size(); i++) {
335  out << std::setw(w) << (i + base) << " | ";
336  if(m_sa) out << std::setw(w) << ((*m_sa)[i] + base) << " | ";
337  if(m_phi) out << std::setw(w) << (*m_phi)[i] << " | ";
338  if(m_plcp) out << std::setw(w) << (*m_plcp)[i] << " | ";
339  if(m_lcp) out << std::setw(w) << (*m_lcp)[i] << " | ";
340  if(m_isa) out << std::setw(w) << ((*m_isa)[i] + base) << " | ";
341  out << std::endl;
342  }
343  }
344 };
345 
346 } //ns
ds::dsflags_t dsflags_t
Definition: TextDS.hpp:32
Constructs the suffix array using divsufsort.
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Constructs the inverse suffix array using the suffix array.
Definition: ISAFromSA.hpp:12
Special mode that will cause no bit-compression at all during construction, internal use in TextDS on...
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
const phi_t & require_phi(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:161
size_t size() const
Returns the size of the input text.
Definition: TextDS.hpp:305
bool ends_with(const T &other) const
Returns true if the View ends with the literal other
void print(std::ostream &out, size_t base)
Definition: TextDS.hpp:309
Data structures are constructed directly in bit-compressed space (slower construction, but smaller memory usage).
phi_t release_phi()
Definition: TextDS.hpp:207
static const dsflags_t PHI
Definition: TextDS.hpp:36
lcp_t::data_type inplace_lcp(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:191
Env env_for_option(const std::string &option) const
Create the environment for a sub algorithm option.
Definition: Env.hpp:34
constexpr dsflags_t PHI
Definition: TextDSFlags.hpp:14
const std::string & as_string() const
A const view into a slice of memory.
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
const OptionValue & option(const std::string &option) const
Get an option of this algorithm.
Definition: Env.hpp:44
unsigned int dsflags_t
Definition: TextDSFlags.hpp:8
isa_t release_isa()
Definition: TextDS.hpp:216
static Meta meta()
Definition: TextDS.hpp:117
constexpr dsflags_t ISA
Definition: TextDSFlags.hpp:12
Describes a set of restrictions placed on input data.
static const dsflags_t PLCP
Definition: TextDS.hpp:37
constexpr dsflags_t PLCP
Definition: TextDSFlags.hpp:15
sa_t::data_type inplace_sa(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:176
const lcp_t & require_lcp(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:167
const sa_t & require_sa(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:158
phi_t::data_type inplace_phi(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:181
static const dsflags_t ISA
Definition: TextDS.hpp:34
Constructs the PLCP array using the phi array.
Definition: PLCPFromPhi.hpp:12
size_type size() const
Returns size of the View.
static ds::InputRestrictions common_restrictions(dsflags_t flags)
Definition: TextDS.hpp:47
lcp_t lcp_type
Definition: TextDS.hpp:44
isa_t::data_type inplace_isa(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:196
Constructs the LCP array using the Phi algorithm.
Definition: LCPFromPLCP.hpp:12
TextDS(Env &&env, const View &text)
Definition: TextDS.hpp:128
CompressMode
Defines when data structures are bit-compressed.
Definition: CompressMode.hpp:6
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
const plcp_t & require_plcp(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:164
const value_type * text() const
Provides direct access to the input text.
Definition: TextDS.hpp:300
uliteral_t value_type
Definition: TextDS.hpp:39
Constructs the Phi array using the suffix array.
Definition: PhiFromSA.hpp:12
static const dsflags_t SA
Definition: TextDS.hpp:33
const isa_t & require_isa(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:170
plcp_t plcp_type
Definition: TextDS.hpp:43
phi_t phi_type
Definition: TextDS.hpp:42
CompressMode cm_select(CompressMode in, CompressMode sel)
static const dsflags_t LCP
Definition: TextDS.hpp:35
plcp_t release_plcp()
Definition: TextDS.hpp:210
isa_t isa_type
Definition: TextDS.hpp:45
plcp_t::data_type inplace_plcp(CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:186
sa_t release_sa()
Definition: TextDS.hpp:204
const value_type * data() const noexcept
The backing memory location.
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11
value_type operator[](size_t i) const
Accesses the input text at position i.
Definition: TextDS.hpp:295
void require(dsflags_t flags, CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:247
sa_t sa_type
Definition: TextDS.hpp:41
Automatically select compress mode, internal use in TextDS only.
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
TextDS(Env &&env, const View &text, dsflags_t flags, CompressMode cm=CompressMode::select)
Definition: TextDS.hpp:150
Data structures are bit-compressed after they have been constructed (fast construction, but possibly high memory peak).
constexpr dsflags_t LCP
Definition: TextDSFlags.hpp:13
Local environment for a compression/encoding/decompression call.
Manages text related data structures.
Definition: TextDS.hpp:30
Interface for algorithms.
Definition: Algorithm.hpp:15
lcp_t release_lcp()
Definition: TextDS.hpp:213
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150
Data structures are not bit-compressed at all (fastest, but highest memory usage).
Definition: CompressMode.hpp:9