tudocomp
– The TU Dortmund Compression Framework
ISAFromSA.hpp
Go to the documentation of this file.
1 #pragma once
2 
6 
8 
9 namespace tdc {
10 
12 class ISAFromSA: public Algorithm, public ArrayDS {
13 public:
14  inline static Meta meta() {
15  Meta m("isa", "from_sa");
16  return m;
17  }
18 
20  return ds::InputRestrictions {};
21  }
22 
23  template<typename textds_t>
24  inline ISAFromSA(Env&& env, textds_t& t, CompressMode cm)
25  : Algorithm(std::move(env)) {
26 
27  // Require Suffix Array
28  auto& sa = t.require_sa(cm);
29 
30  StatPhase::wrap("Construct ISA", [&]{
31  // Allocate
32  const size_t n = t.size();
33  const size_t w = bits_for(n);
35 
36  // Construct
37  for(len_t i = 0; i < n; i++) {
38  (*this)[sa[i]] = i;
39  }
40 
41  StatPhase::log("bit_width", size_t(width()));
42  StatPhase::log("size", bit_size() / 8);
43  });
44 
45  if(cm == CompressMode::delayed) compress();
46  }
47 
48  void compress() {
49  debug_check_array_is_initialized();
50 
51  StatPhase::wrap("Compress ISA", [this]{
52  width(bits_for(size()));
53  shrink_to_fit();
54 
55  StatPhase::log("bit_width", size_t(width()));
56  StatPhase::log("size", bit_size() / 8);
57  });
58  }
59 };
60 
61 } //ns
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
Constructs the inverse suffix array using the suffix array.
Definition: ISAFromSA.hpp:12
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
DynamicIntVector iv_t
The type of integer array to use as storage.
Definition: ArrayDS.hpp:12
Data structures are constructed directly in bit-compressed space (slower construction, but smaller memory usage).
static ds::InputRestrictions restrictions()
Definition: ISAFromSA.hpp:19
Describes a set of restrictions placed on input data.
Base for data structures that use an integer array as a storage.
Definition: ArrayDS.hpp:9
CompressMode
Defines when data structures are bit-compressed.
Definition: CompressMode.hpp:6
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
static Meta meta()
Definition: ISAFromSA.hpp:14
ISAFromSA(Env &&env, textds_t &t, CompressMode cm)
Definition: ISAFromSA.hpp:24
void set_array(iv_t &&iv)
Definition: ArrayDS.hpp:26
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
void compress()
Definition: ISAFromSA.hpp:48
uint64_t bit_size() const
Definition: IntVector.hpp:288
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
constexpr size_t INDEX_FAST_BITS
The amount of bits required to store the binary representation of a value of type len_t...
Definition: def.hpp:128
Data structures are bit-compressed after they have been constructed (fast construction, but possibly high memory peak).
Local environment for a compression/encoding/decompression call.
size_type size() const
Definition: IntVector.hpp:284
Interface for algorithms.
Definition: Algorithm.hpp:15