19 std::is_same<View::value_type, uliteral_t>::value,
20 "View::value_type and uliteral_t must be the same");
24 typename sa_t = SADivSufSort,
25 typename phi_t = PhiFromSA,
26 typename plcp_t = PLCPFromPhi,
27 typename lcp_t = LCPFromPLCP,
28 typename isa_t = ISAFromSA
50 if (flags & SA) rest |= sa_type::restrictions();
51 if (flags & ISA) rest |= isa_type::restrictions();
52 if (flags & LCP) rest |= lcp_type::restrictions();
53 if (flags & PHI) rest |= phi_type::restrictions();
54 if (flags & PLCP) rest |= plcp_type::restrictions();
64 std::unique_ptr<sa_t> m_sa;
65 std::unique_ptr<phi_t> m_phi;
66 std::unique_ptr<plcp_t> m_plcp;
67 std::unique_ptr<lcp_t> m_lcp;
68 std::unique_ptr<isa_t> m_isa;
73 template<
typename ds_t>
74 inline std::unique_ptr<ds_t> construct_ds(
const std::string& option,
CompressMode cm) {
75 return std::make_unique<ds_t>(
81 template<
typename ds_t>
82 inline const ds_t& require_ds(
83 std::unique_ptr<ds_t>& p,
const std::string& option,
CompressMode cm) {
85 if(!p) p = construct_ds<ds_t>(option,
cm_select(cm, m_cm));
89 template<
typename ds_t>
90 inline void discard_ds(std::unique_ptr<ds_t>& p,
dsflags_t flag) {
94 template<
typename ds_t>
96 DCHECK(p) <<
"TextDS did not contain a " << err_msg;
100 template<
typename ds_t>
101 inline typename ds_t::data_type inplace_ds(
104 if(!p) p = construct_ds<ds_t>(option,
cm_select(cm, m_cm));
105 if(m_ds_requested & flag) {
110 auto data = p->relinquish();
112 return std::move(data);
118 Meta m(
"textds",
"textds");
130 m_text(text), m_ds_requested(0) {
133 throw std::logic_error(
134 "Input has no sentinel! Please make sure you declare " 135 "the compressor calling this with " 136 "`m.needs_sentinel_terminator()` in its `meta()` function." 141 if(cm_str ==
"delayed") {
143 }
else if(cm_str ==
"compressed") {
159 return require_ds(m_sa,
"sa", cm);
162 return require_ds(m_phi,
"phi", cm);
165 return require_ds(m_plcp,
"plcp", cm);
168 return require_ds(m_lcp,
"lcp", cm);
171 return require_ds(m_isa,
"isa", cm);
179 return inplace_ds(m_sa, SA,
"sa", cm);
184 return inplace_ds(m_phi, PHI,
"phi", cm);
189 return inplace_ds(m_plcp, PLCP,
"plcp", cm);
194 return inplace_ds(m_lcp, LCP,
"lcp", cm);
199 return inplace_ds(m_isa, ISA,
"isa", cm);
205 return release_ds(m_sa, SA,
"SA");
208 return release_ds(m_phi, PHI,
"PHI");
211 return release_ds(m_plcp, PLCP,
"PLCP");
214 return release_ds(m_lcp, LCP,
"LCP");
217 return release_ds(m_isa, ISA,
"ISA");
221 inline void discard_sa() {
222 discard_ds(m_sa, SA);
224 inline void discard_phi() {
225 discard_ds(m_phi, PHI);
227 inline void discard_plcp() {
228 discard_ds(m_plcp, PLCP);
230 inline void discard_lcp() {
231 discard_ds(m_lcp, LCP);
233 inline void discard_isa() {
234 discard_ds(m_isa, ISA);
237 inline void discard_unneeded() {
239 if(!(m_ds_requested & SA)) discard_sa();
240 if(!(m_ds_requested & PHI)) discard_phi();
241 if(!(m_ds_requested & PLCP)) discard_plcp();
242 if(!(m_ds_requested & LCP)) discard_lcp();
243 if(!(m_ds_requested & ISA)) discard_isa();
248 m_ds_requested = flags;
258 if(flags & SA) {
require_sa(cm); discard_unneeded(); }
261 if(flags & PHI) {
require_phi(cm); discard_unneeded(); }
288 if(m_sa) m_sa->compress();
289 if(m_phi) m_phi->compress();
290 if(m_plcp) m_plcp->compress();
301 return m_text.
data();
306 return m_text.
size();
309 inline void print(std::ostream& out,
size_t base) {
310 size_t w = std::max(8UL, (
size_t)std::log10((
double)
size()) + 1);
311 out << std::setfill(
' ');
314 out << std::setw(w) <<
"i" <<
" | ";
315 if(m_sa) out << std::setw(w) <<
"SA[i]" <<
" | ";
316 if(m_phi) out << std::setw(w) <<
"Phi[i]" <<
" | ";
317 if(m_plcp) out << std::setw(w) <<
"PLCP[i]" <<
" | ";
318 if(m_lcp) out << std::setw(w) <<
"LCP[i]" <<
" | ";
319 if(m_isa) out << std::setw(w) <<
"ISA[i]" <<
" | ";
323 out << std::setfill(
'-');
324 out << std::setw(w) <<
"" <<
"-|-";
325 if(m_sa) out << std::setw(w) <<
"" <<
"-|-";
326 if(m_phi) out << std::setw(w) <<
"" <<
"-|-";
327 if(m_plcp) out << std::setw(w) <<
"" <<
"-|-";
328 if(m_lcp) out << std::setw(w) <<
"" <<
"-|-";
329 if(m_isa) out << std::setw(w) <<
"" <<
"-|-";
333 out << std::setfill(
' ');
334 for(
size_t i = 0; i <
size(); i++) {
335 out << std::setw(w) << (i + base) <<
" | ";
336 if(m_sa) out << std::setw(w) << ((*m_sa)[i] + base) <<
" | ";
337 if(m_phi) out << std::setw(w) << (*m_phi)[i] <<
" | ";
338 if(m_plcp) out << std::setw(w) << (*m_plcp)[i] <<
" | ";
339 if(m_lcp) out << std::setw(w) << (*m_lcp)[i] <<
" | ";
340 if(m_isa) out << std::setw(w) << ((*m_isa)[i] + base) <<
" | ";
Constructs the suffix array using divsufsort.
Contains the text compression and encoding framework.
Constructs the inverse suffix array using the suffix array.
Special mode that will cause no bit-compression at all during construction, internal use in TextDS on...
const phi_t & require_phi(CompressMode cm=CompressMode::select)
size_t size() const
Returns the size of the input text.
bool ends_with(const T &other) const
Returns true if the View ends with the literal other
void print(std::ostream &out, size_t base)
Data structures are constructed directly in bit-compressed space (slower construction, but smaller memory usage).
static const dsflags_t PHI
lcp_t::data_type inplace_lcp(CompressMode cm=CompressMode::select)
Env env_for_option(const std::string &option) const
Create the environment for a sub algorithm option.
const std::string & as_string() const
A const view into a slice of memory.
uint8_t uliteral_t
Type to represent signed single literals.
const OptionValue & option(const std::string &option) const
Get an option of this algorithm.
static const dsflags_t PLCP
sa_t::data_type inplace_sa(CompressMode cm=CompressMode::select)
const lcp_t & require_lcp(CompressMode cm=CompressMode::select)
const sa_t & require_sa(CompressMode cm=CompressMode::select)
phi_t::data_type inplace_phi(CompressMode cm=CompressMode::select)
static const dsflags_t ISA
Constructs the PLCP array using the phi array.
size_type size() const
Returns size of the View.
static ds::InputRestrictions common_restrictions(dsflags_t flags)
isa_t::data_type inplace_isa(CompressMode cm=CompressMode::select)
Constructs the LCP array using the Phi algorithm.
TextDS(Env &&env, const View &text)
CompressMode
Defines when data structures are bit-compressed.
Env & env()
Provides access to the environment that the algorithm works in.
const plcp_t & require_plcp(CompressMode cm=CompressMode::select)
const value_type * text() const
Provides direct access to the input text.
Constructs the Phi array using the suffix array.
static const dsflags_t SA
const isa_t & require_isa(CompressMode cm=CompressMode::select)
CompressMode cm_select(CompressMode in, CompressMode sel)
static const dsflags_t LCP
plcp_t::data_type inplace_plcp(CompressMode cm=CompressMode::select)
const value_type * data() const noexcept
The backing memory location.
value_type operator[](size_t i) const
Accesses the input text at position i.
void require(dsflags_t flags, CompressMode cm=CompressMode::select)
Automatically select compress mode, internal use in TextDS only.
TextDS(Env &&env, const View &text, dsflags_t flags, CompressMode cm=CompressMode::select)
Data structures are bit-compressed after they have been constructed (fast construction, but possibly high memory peak).
Local environment for a compression/encoding/decompression call.
Manages text related data structures.
Interface for algorithms.
Data structures are not bit-compressed at all (fastest, but highest memory usage).