tudocomp
– The TU Dortmund Compression Framework
BulldozerStrategy.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <vector>
4 
5 #include <tudocomp/Algorithm.hpp>
7 #include <tudocomp/ds/TextDS.hpp>
8 
10 
12 
13 namespace tdc {
14 namespace lcpcomp {
15 
19 class BulldozerStrategy : public Algorithm {
20 private:
21  struct Interval {
22  size_t p, q, l;
23  };
24 
25  struct IntervalComparator {
26  bool operator()(const Interval& a, const Interval& b) {
27  if(a.p != b.p) {
28  return a.p < b.p;
29  } else {
30  return a.l > b.l;
31  }
32  }
33  };
34 
35 public:
36  using Algorithm::Algorithm; //import constructor
37 
38  inline static Meta meta() {
39  Meta m("lcpcomp_comp", "bulldozer");
40  return m;
41  }
42 
43  inline static ds::dsflags_t textds_flags() {
44  return ds::SA | ds::ISA | ds::LCP;
45  }
46 
47  template<typename text_t>
48  inline void factorize(text_t& text,
49  size_t threshold,
50  lzss::FactorBuffer& factors) {
51 
52  // Construct SA, ISA and LCP
53  StatPhase::wrap("Construct text ds", [&]{
54  text.require(text_t::SA | text_t::ISA | text_t::LCP);
55  });
56 
57  auto& sa = text.require_sa();
58  auto& lcp = text.require_lcp();
59 
60  //
61  size_t n = sa.size();
62 
63  //induce intervals
64  std::vector<Interval> intervals;
65  StatPhase::wrap("Induce intervals", [&]{
66  std::vector<Interval> intervals;
67  for(size_t i = 1; i < sa.size(); i++) {
68  if(lcp[i] >= threshold) {
69  intervals.emplace_back(sa[i], sa[i-1], lcp[i]);
70  intervals.emplace_back(sa[i-1], sa[i], lcp[i]);
71  }
72  }
73 
74  StatPhase::log("numIntervals", intervals.size());
75  });
76 
77  //sort
78  StatPhase::wrap("Sort intervals", [&]{
79  std::sort(intervals.begin(), intervals.end(), IntervalComparator());
80  });
81 
82  //debug output
83  /*DLOG(INFO) << "Intervals:";
84  for(auto it = intervals.begin(); it != intervals.end(); ++it) {
85  DLOG(INFO) << "[" << (it->p+1) << ", " << (it->q+1) << ", " << it->l << "]";
86  }*/
87 
88  //marker
89  StatPhase::wrap("Process Intervals", [&]{
90  BitVector marked(n);
91 
92  auto x = intervals.begin();
93  while(x != intervals.end()) {
94  if(!marked[x->q]) {
95  //find maximum amount of consecutive unmarked positions
96  size_t l = 1;
97  while(l < x->l && x->q + l < n && !marked[x->q + l]) {
98  ++l;
99  }
100 
101  if(l >= threshold) {
102  factors.emplace_back(x->p, x->q, l);
103 
104  //mark source positions as "unreplaceable"
105  for(size_t k = 0; k < l; k++) {
106  marked[x->p + k] = 1;
107  }
108 
109  //jump to next available interval
110  size_t p = x->p;
111  do {
112  ++x;
113  } while(x != intervals.end() && x->p < p + l);
114 
115  continue;
116  }
117  }
118 
119  ++x;
120  }
121  });
122  }
123 };
124 
125 }}
126 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
void factorize(text_t &text, size_t threshold, lzss::FactorBuffer &factors)
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
unsigned int dsflags_t
Definition: TextDSFlags.hpp:8
constexpr dsflags_t ISA
Definition: TextDSFlags.hpp:12
Algorithm(Algorithm const &)=default
static ds::dsflags_t textds_flags()
Implements the "Bulldozer" selection strategy for LCPComp.
void emplace_back(len_t fpos, len_t fsrc, len_t flen)
Definition: LZSSFactors.hpp:41
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
constexpr dsflags_t LCP
Definition: TextDSFlags.hpp:13
Interface for algorithms.
Definition: Algorithm.hpp:15