2 #include <tudocomp/config.h> 13 #include <boost/heap/pairing_heap.hpp> 25 Meta m(
"lcpcomp_comp",
"plcp",
"compressor using PLCP array");
33 template<
typename text_t>
41 const auto& sa = text.require_sa();
45 const auto& sa = text.require_sa();
46 const auto& isa = text.require_isa();
47 const len_t n = sa.size();
55 DCHECK_NE(o.pos, this->pos);
56 if(o.lcp == this->lcp)
return this->pos > o.pos;
57 return this->lcp < o.lcp;
62 boost::heap::pairing_heap<Poi> heap;
63 std::vector<typename boost::heap::pairing_heap<Poi>::handle_type> handles;
70 len_t lastpos_lcp = 0;
71 for(
len_t i = 0; i+1 < n; ++i) {
72 while(pplcp.index() < i) pplcp.advance();
73 const len_t plcp_i = pplcp(); DCHECK_EQ(pplcp.index(), i);
75 if(plcp_i >= threshold) {
76 handles.emplace_back(heap.emplace(i, plcp_i, handles.size()));
82 if(i - lastpos >= lastpos_lcp ||
tdc_unlikely(i+1 == n)) {
84 IF_STATS(max_heap_size = std::max<len_t>(max_heap_size, heap.size()));
85 DCHECK_EQ(heap.size(), handles.size());
86 while(!heap.empty()) {
87 const Poi& top = heap.top();
88 const len_t source_position = sa[isa[top.pos]-1];
90 const len_t next_pos = top.pos;
91 IF_DEBUG(
if(first) DCHECK_EQ(top.pos, lastpos); first =
false;)
94 len_t newlcp_peak = 0;
95 bool peak_exists =
false;
96 if(top.pos+top.lcp < i)
97 for(
len_t j = top.no+1; j < handles.size(); ++j) {
98 if( handles[j].node_ ==
nullptr)
continue;
99 const Poi poi = *(handles[j]);
100 DCHECK_LT(next_pos, poi.pos);
101 if(poi.pos < next_pos+top.lcp) {
102 heap.erase(handles[j]);
103 handles[j].node_ =
nullptr;
104 if(poi.lcp + poi.pos > next_pos+top.lcp) {
105 const len_t remaining_lcp = poi.lcp+poi.pos - (next_pos+top.lcp);
106 DCHECK_NE(remaining_lcp,0);
107 if(newlcp_peak != 0) DCHECK_LE(remaining_lcp, newlcp_peak);
108 newlcp_peak = std::max(remaining_lcp, newlcp_peak);
110 }
else if( poi.pos == next_pos+top.lcp) { peak_exists=
true; }
115 for(
len_t j = top.no+1; j < handles.size(); ++j) {
116 if( handles[j].node_ ==
nullptr)
continue;
117 const Poi& poi = *(handles[j]);
118 if(poi.pos == next_pos+top.lcp) {
119 DCHECK_LE(newlcp_peak, poi.lcp);
125 if(!peak_exists && newlcp_peak >= threshold) {
127 DCHECK(handles[j].node_ ==
nullptr);
128 handles[j] = heap.emplace(next_pos+top.lcp, newlcp_peak, j);
132 handles[top.no].node_ =
nullptr;
135 for(
auto it = handles.rbegin(); it != handles.rend(); ++it) {
136 if( (*it).node_ ==
nullptr)
continue;
138 if(poi.pos > next_pos)
continue;
139 const len_t newlcp = next_pos - poi.pos;
140 if(newlcp < poi.lcp) {
141 if(newlcp < threshold) {
158 DCHECK_EQ(pplcp.index(), i);
159 DCHECK_EQ(plcp_i, pplcp());
160 if(plcp_i <= lastpos_lcp)
continue;
161 DCHECK_LE(threshold, plcp_i);
162 handles.emplace_back(heap.emplace(i,plcp_i, handles.size()));
165 lastpos_lcp = plcp_i;
bool operator<(const ConstGenericView< uliteral_t > &lhs, const ConstGenericView< uliteral_t > &rhs)
Contains the text compression and encoding framework.
#define tdc_unlikely(x)
Provides a hint to the compiler that x is expected to resolve to false.
Algorithm(Algorithm const &)=default
void factorize(text_t &text, size_t threshold, lzss::FactorBuffer &factors)
Env & env()
Provides access to the environment that the algorithm works in.
fast_t< len_compact_t > len_t
Type to represent an length value.
#define IF_STATS(x)
x is compiled only when the STATS_DISABLED macro is undefined.
void emplace_back(len_t fpos, len_t fsrc, len_t flen)
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
static ds::dsflags_t textds_flags()
Interface for algorithms.
sdsl::bit_vector construct_plcp_bitvector(Env &, const sa_t &sa, const text_t &text)
#define IF_DEBUG(x)
x is compiled only in debug builds.