tudocomp
– The TU Dortmund Compression Framework
MaxLCPStrategy.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/Algorithm.hpp>
4 #include <tudocomp/ds/TextDS.hpp>
5 
8 
10 
11 namespace tdc {
12 namespace lcpcomp {
13 
22 class MaxLCPStrategy : public Algorithm {
23 public:
24  inline static Meta meta() {
25  Meta m("lcpcomp_comp", "max_lcp");
26  return m;
27  }
28 
29  inline static ds::dsflags_t textds_flags() {
30  return ds::SA | ds::ISA | ds::LCP;
31  }
32 
33  using Algorithm::Algorithm; //import constructor
34 
35  template<typename text_t>
36  inline void factorize(text_t& text,
37  size_t threshold,
38  lzss::FactorBuffer& factors) {
39 
40  // Construct SA, ISA and LCP
41  //StatPhase::wrap("Construct text ds", [&]{
42  // text.require(text_t::SA | text_t::ISA | text_t::LCP);
43  //});
44 
45  auto& sa = text.require_sa();
46  auto& isa = text.require_isa();
47 
48  text.require_lcp();
49  auto lcp = text.release_lcp();
50 
51  auto list = StatPhase::wrap("Construct MaxLCPSuffixList", [&]{
53  lcp, threshold, lcp.max_lcp());
54 
55  StatPhase::log("entries", list.size());
56  return list;
57  });
58 
59  //Factorize
60  StatPhase::wrap("Process MaxLCPSuffixList", [&]{
61  while(list.size() > 0) {
62  //get suffix with longest LCP
63  size_t m = list.get_max();
64 
65  //generate factor
66  size_t fpos = sa[m];
67  size_t fsrc = sa[m-1];
68  size_t flen = lcp[m];
69 
70  factors.emplace_back(fpos, fsrc, flen);
71 
72  //remove overlapped entries
73  for(size_t k = 0; k < flen; k++) {
74  size_t i = isa[fpos + k];
75  if(list.contains(i)) {
76  list.remove(i);
77  }
78  }
79 
80  //correct intersecting entries
81  for(size_t k = 0; k < flen && fpos > k; k++) {
82  size_t s = fpos - k - 1;
83  size_t i = isa[s];
84  if(list.contains(i)) {
85  if(s + lcp[i] > fpos) {
86  size_t l = fpos - s;
87  if(l >= threshold) {
88  list.decrease_key(i, l);
89  } else {
90  list.remove(i);
91  }
92  }
93  }
94  }
95  }
96 
97  StatPhase::log("num_factors", factors.size());
98  });
99  }
100 };
101 
102 }}
103 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Implements the original "Max LCP" selection strategy for LCPComp.
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
static ds::dsflags_t textds_flags()
Provides constant-time access to the suffix in a suffix array with the longest correspondig lcp table...
unsigned int dsflags_t
Definition: TextDSFlags.hpp:8
constexpr dsflags_t ISA
Definition: TextDSFlags.hpp:12
Algorithm(Algorithm const &)=default
void emplace_back(len_t fpos, len_t fsrc, len_t flen)
Definition: LZSSFactors.hpp:41
void factorize(text_t &text, size_t threshold, lzss::FactorBuffer &factors)
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
constexpr dsflags_t LCP
Definition: TextDSFlags.hpp:13
Interface for algorithms.
Definition: Algorithm.hpp:15