tudocomp
– The TU Dortmund Compression Framework
MaxLCPSuffixList.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/util.hpp>
5 
6 namespace tdc {
7 namespace lcpcomp {
8 
14 template<class lcp_t>
16 
17 private:
18  //data backend
19  lcp_t* m_lcp;
20 
21  //undefined suffix
22  const size_t m_undef; // == m_lcp->size()
23 
24  //linked list
25  size_t m_first; //linked list head (maximum LCP)
26  size_t m_last; //linked list tail (current minimum LCP)
27 
28  DynamicIntVector m_prev, m_next;
29 
30  //LCP index
31  DynamicIntVector m_lcp_index;
32 
33  //Suffix membership list
34  BitVector m_suffix_contained;
35 
36  //Entry counter
37  size_t m_size = 0;
38 
40  inline size_t lookup_lcp_index(size_t lcp) const {
41  //DLOG(INFO) << "lookup_lcp_index(" << lcp << ")";
42  DCHECK_LE(lcp, m_lcp_index.size());
43 
44  size_t result = m_undef;
45  while(lcp > 0 && result == m_undef) {
46  result = m_lcp_index[--lcp];
47  }
48 
49  return result;
50  }
51 
52 public:
54  inline MaxLCPSuffixList(lcp_t& lcp, size_t min_lcp, size_t max_lcp)
55  : m_lcp(&lcp), m_undef(m_lcp->size())
56  {
57  const size_t& n = lcp.size();
58 
59  //Initialize doubly linked list
60  m_first = m_undef;
61  m_last = m_undef;
62  m_prev = DynamicIntVector(n, m_undef, bits_for(m_undef));
63  m_next = DynamicIntVector(n, m_undef, bits_for(m_undef));
64 
65  m_lcp_index = DynamicIntVector(max_lcp, m_undef, bits_for(m_undef));
66 
67  //Initialize suffix reference map
68  m_suffix_contained = BitVector(n, 0);
69 
70  //Construct list
71  for (size_t i = 1; i < n; i++) {
72  if (lcp[i] >= min_lcp) {
73  insert(i);
74  }
75  }
76  }
77 
78 private:
80  inline void insert(size_t i) {
81  DCHECK(i < m_undef && !m_suffix_contained[i]);
82 
83  size_t lcp = (*m_lcp)[i];
84  size_t pos = lookup_lcp_index(lcp);
85  if(pos == m_undef) {
86  //insert at end
87  if(m_last != m_undef) {
88  m_next[m_last] = i;
89  }
90 
91  m_next[i] = m_undef;
92  m_prev[i] = m_last;
93 
94  m_last = i;
95  } else {
96  //insert at position
97  size_t prev = m_prev[pos];
98 
99  m_prev[i] = prev;
100  m_next[i] = pos;
101 
102  if(prev != m_undef) {
103  m_next[prev] = i;
104  } else {
105  DCHECK(pos == m_first);
106  m_first = i;
107  }
108 
109  m_prev[pos] = i;
110  }
111 
112  //update lcp index
113  m_lcp_index[lcp-1] = i;
114 
115  //update first
116  if(m_first == m_undef) {
117  m_first = i;
118  }
119 
120  //update info
121  m_suffix_contained[i] = 1;
122  ++m_size;
123  }
124 
125 public:
127  inline void remove(size_t i) {
128  DCHECK(i < m_undef && m_suffix_contained[i]);
129 
130  //unlink
131  if(m_prev[i] != m_undef) {
132  m_next[m_prev[i]] = m_next[i];
133  } else {
134  DCHECK(i == m_first);
135  m_first = m_next[i];
136  }
137 
138  if(m_next[i] != m_undef) {
139  m_prev[m_next[i]] = m_prev[i];
140  } else {
141  DCHECK(i == m_last);
142  m_last = m_prev[i];
143  }
144 
145  //update LCP index
146  size_t lcp = (*m_lcp)[i];
147  if(m_lcp_index[lcp-1] == i) {
148  size_t k = m_next[i];
149  if (k != m_undef && (*m_lcp)[k] == lcp) {
150  m_lcp_index[lcp-1] = k; //move to next entry with same LCP
151  } else {
152  m_lcp_index[lcp-1] = m_undef; //invalidate
153  }
154  }
155 
156  //update info
157  m_suffix_contained[i] = 0;
158  --m_size;
159  }
160 
162  template<typename T>
163  inline void decrease_key(len_t i, T value) {
164  remove(i);
165  (*m_lcp)[i] = value;
166  insert(i);
167  }
168 
170  inline bool contains(size_t i) const {
171  return m_suffix_contained[i];
172  }
173 
175  inline size_t size() const {
176  return m_size;
177  }
178 
180  inline bool empty() const {
181  return (m_size == 0);
182  }
183 
185  inline size_t get_max() const {
186  DCHECK(m_size > 0);
187  return m_first;
188  }
189 };
190 
191 }} //ns
192 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
Provides constant-time access to the suffix in a suffix array with the longest correspondig lcp table...
MaxLCPSuffixList(lcp_t &lcp, size_t min_lcp, size_t max_lcp)
Constructor.
IntVector< uint_t< 1 > > BitVector
Represents a bit vector, alias for IntVector with a fixed bit width of 1.
Definition: IntVector.hpp:545
void decrease_key(len_t i, T value)
Decrease key on array item with index i.
bool empty() const
Test if list is empty.
size_t size() const
Get number of contained entries.
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
len_compact_t pos
Definition: LZSSFactors.hpp:38
IntVector< dynamic_t > DynamicIntVector
Represents an integer vector with unspecified (dynamic) bit width.
Definition: IntVector.hpp:553
bool contains(size_t i) const
Checks whether or not suffix array entry i is contained in this list.
size_type size() const
Definition: IntVector.hpp:284
size_t get_max() const
Get first item (suffix array index with highest LCP)