tudocomp
– The TU Dortmund Compression Framework
SADivSufSort.hpp
Go to the documentation of this file.
1 #pragma once
2 
7 
9 
10 namespace tdc {
11 
13 class SADivSufSort: public Algorithm, public ArrayDS {
14 public:
15  inline static Meta meta() {
16  Meta m("sa", "divsufsort");
17  return m;
18  }
19 
21  return ds::InputRestrictions {
22  { 0 },
23  true
24  };
25  }
26 
27  template<typename textds_t>
28  inline SADivSufSort(Env&& env, const textds_t& t, CompressMode cm)
29  : Algorithm(std::move(env)) {
30 
31  StatPhase::wrap("Construct SA", [&]{
32  // Allocate
33  const size_t n = t.size();
34  const size_t w = bits_for(n);
35 
36  // divsufsort needs one additional bit for signs
37  set_array(iv_t(n, 0, (cm == CompressMode::compressed) ? w + 1 : INDEX_FAST_BITS));
38  //std::cout << w << "\n";
39  //std::cout << INDEX_FAST_BITS << "\n";
40 
41  // Use divsufsort to construct
42  divsufsort(t.text(), (iv_t&) *this, n);
43 
44  StatPhase::log("bit_width", size_t(width()));
45  StatPhase::log("size", bit_size() / 8);
46  });
47 
49  compress();
50  }
51  }
52 
53  void compress() {
54  debug_check_array_is_initialized();
55 
56  StatPhase::wrap("Compress SA", [this]{
57  width(bits_for(size()));
58  shrink_to_fit();
59 
60  StatPhase::log("bit_width", size_t(width()));
61  StatPhase::log("size", bit_size() / 8);
62  });
63  }
64 };
65 
66 } //ns
Constructs the suffix array using divsufsort.
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
DynamicIntVector iv_t
The type of integer array to use as storage.
Definition: ArrayDS.hpp:12
Data structures are constructed directly in bit-compressed space (slower construction, but smaller memory usage).
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
Describes a set of restrictions placed on input data.
static Meta meta()
Base for data structures that use an integer array as a storage.
Definition: ArrayDS.hpp:9
CompressMode
Defines when data structures are bit-compressed.
Definition: CompressMode.hpp:6
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
SADivSufSort(Env &&env, const textds_t &t, CompressMode cm)
static ds::InputRestrictions restrictions()
void set_array(iv_t &&iv)
Definition: ArrayDS.hpp:26
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
uint64_t bit_size() const
Definition: IntVector.hpp:288
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
constexpr size_t INDEX_FAST_BITS
The amount of bits required to store the binary representation of a value of type len_t...
Definition: def.hpp:128
Data structures are bit-compressed after they have been constructed (fast construction, but possibly high memory peak).
Local environment for a compression/encoding/decompression call.
size_type size() const
Definition: IntVector.hpp:284
Interface for algorithms.
Definition: Algorithm.hpp:15