tudocomp
– The TU Dortmund Compression Framework
BoostHeap.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/Algorithm.hpp>
4 #include <tudocomp/ds/TextDS.hpp>
6 
8 #include <boost/heap/pairing_heap.hpp>
9 
11 
12 namespace tdc {
13 namespace lcpcomp {
14 
15 
24 class BoostHeap : public Algorithm {
25 public:
26  inline static Meta meta() {
27  Meta m("lcpcomp_comp", "bheap", "boost heaps");
28  return m;
29  }
30 
31  inline static ds::dsflags_t textds_flags() {
32  return ds::SA | ds::ISA | ds::LCP;
33  }
34 
35  using Algorithm::Algorithm; //import constructor
36 
37  template<typename text_t>
38  inline void factorize(text_t& text, const size_t threshold, lzss::FactorBuffer& factors) {
39 
40  // Construct SA, ISA and LCP
41  StatPhase::wrap("Construct text ds", [&]{
42  text.require(text_t::SA | text_t::ISA | text_t::LCP);
43  });
44 
45  auto& sa = text.require_sa();
46  auto& isa = text.require_isa();
47 
48  text.require_lcp();
49  auto lcp = text.release_lcp();
50 
51  struct LCPCompare {
52  using lcp_t = decltype(lcp);
53  using sa_t = decltype(sa); //`text_t::isa_type;
54  lcp_t& m_lcp;
55  sa_t& m_sa;
56  LCPCompare(lcp_t& lcp_, sa_t& sa_) : m_lcp(lcp_), m_sa(sa_) {}
57 
58  bool operator()(const len_t i, const len_t j) const {
59  if(m_lcp[i] == m_lcp[j]) return m_sa[i] > m_sa[j];
60  return m_lcp[i] < m_lcp[j];
61  }
62 
63  };
64 
65  LCPCompare comp(lcp,sa);
66 
67  StatPhase phase("Construct MaxLCPHeap");
68 
69  boost::heap::pairing_heap<len_compact_t,boost::heap::compare<LCPCompare>> heap(comp);
70  std::vector<typename decltype(heap)::handle_type> handles(lcp.size());
71 
72  handles[0].node_ = nullptr;
73  for(size_t i = 1; i < lcp.size(); ++i) {
74  if(lcp[i] >= threshold) handles[i] = heap.emplace(i);
75  else handles[i].node_ = nullptr;
76  }
77 
78  //Factorize
79  phase.split("Process MaxLCPHeap");
80 
81  while(heap.size() > 0) {
82  //get suffix with longest LCP
83  const len_compact_t& m = heap.top();
84 
85  //generate factor
86  const len_t fpos = sa[m];
87  const len_t fsrc = sa[m-1];
88  const len_t flen = lcp[m];
89 
90  factors.emplace_back(fpos, fsrc, flen);
91 
92  //remove overlapped entries
93  for(size_t k = 0; k < flen; k++) {
94  const len_t pos = isa[fpos + k];
95  if(handles[pos].node_ == nullptr) continue;
96  heap.erase(handles[pos]);
97  handles[pos].node_ = nullptr;
98  }
99 
100  //correct intersecting entries
101  for(size_t k = 0; k < flen && fpos > k; k++) {
102  size_t s = fpos - k - 1;
103  size_t i = isa[s];
104  if(handles[i].node_ != nullptr) {
105  if(s + lcp[i] > fpos) {
106  size_t l = fpos - s;
107  if(l >= threshold) {
108  lcp[i] = l;
109  heap.decrease(handles[i]);
110  } else {
111  heap.erase(handles[i]);
112  handles[i].node_ = nullptr;
113  }
114  }
115  }
116  }
117  }
118  }
119 };
120 
121 }}
122 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Implements the original "Max LCP" selection strategy for LCPComp.
Definition: BoostHeap.hpp:24
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
void factorize(text_t &text, const size_t threshold, lzss::FactorBuffer &factors)
Definition: BoostHeap.hpp:38
unsigned int dsflags_t
Definition: TextDSFlags.hpp:8
constexpr dsflags_t ISA
Definition: TextDSFlags.hpp:12
Provides access to runtime and memory measurement in statistics phases.
Definition: StatPhase.hpp:44
void split(const char *new_title)
Starts a new phase as a sibling, reusing the same object.
Definition: StatPhase.hpp:264
Algorithm(Algorithm const &)=default
static ds::dsflags_t textds_flags()
Definition: BoostHeap.hpp:31
uint32_t len_compact_t
Type to represent an bit-compact length value.
Definition: def.hpp:103
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
len_compact_t pos
Definition: LZSSFactors.hpp:38
void emplace_back(len_t fpos, len_t fsrc, len_t flen)
Definition: LZSSFactors.hpp:41
static Meta meta()
Definition: BoostHeap.hpp:26
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
constexpr dsflags_t LCP
Definition: TextDSFlags.hpp:13
Interface for algorithms.
Definition: Algorithm.hpp:15