tudocomp
– The TU Dortmund Compression Framework
BitPackingVector.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <algorithm>
4 #include <cmath>
5 #include <cstddef>
6 #include <cstring>
7 #include <fstream>
8 #include <iomanip>
9 #include <iostream>
10 #include <iterator>
11 #include <memory>
12 #include <sstream>
13 #include <string>
14 #include <type_traits>
15 #include <utility>
16 #include <climits>
17 
18 #include <tudocomp/ds/IntRepr.hpp>
19 #include <tudocomp/ds/IntPtr.hpp>
20 #include <tudocomp/ds/uint_t.hpp>
23 
24 #include <sdsl/bits.hpp>
25 #include <glog/logging.h>
26 
28 
29 namespace tdc {
30 namespace int_vector {
31  template<class T>
32  struct BitPackingVector: IntRepr<T>::BitPackingVectorRepr {
33  typedef typename IntRepr<T>::value_type value_type;
34 
35  typedef IntRef<T> reference;
36  typedef ConstIntRef<T> const_reference;
37 
38  typedef IntPtr<T> pointer;
39  typedef ConstIntPtr<T> const_pointer;
40 
41  typedef pointer iterator;
42  typedef const_pointer const_iterator;
43 
44  typedef typename std::reverse_iterator<iterator> reverse_iterator;
45  typedef typename std::reverse_iterator<const_iterator> const_reverse_iterator;
46 
47  typedef ptrdiff_t difference_type;
48  typedef size_t size_type;
49 
50  typedef typename IntRepr<T>::BitPackingVectorRepr::internal_data_type internal_data_type;
51 
52  template<class M>
53  friend bool operator==(const BitPackingVector<M>& lhs, const BitPackingVector<M>& rhs);
54 
55  inline static uint64_t backing2bits_w(size_t n) {
56  return uint64_t(sizeof(internal_data_type) * CHAR_BIT) * uint64_t(n);
57  }
58  inline uint64_t backing2bits(size_t n) const {
59  return backing2bits_w(n);
60  }
61 
62  inline static uint64_t elem2bits_w(size_t n, uint8_t w) {
63  return uint64_t(w) * uint64_t(n);
64  }
65  inline uint64_t elem2bits(size_t n) const {
66  return elem2bits_w(n, this->width());
67  }
68 
69  inline static uint64_t bits2backing_w(uint64_t bits) {
70  if (bits == 0) {
71  return 0;
72  }
73  return ((bits - 1) / backing2bits_w(1)) + 1;
74  }
75  inline uint64_t bits2backing(uint64_t bits) const {
76  return bits2backing_w(bits);
77  }
78 
79  inline static uint64_t bits2elem_w(uint64_t bits, uint8_t w) {
80  if (bits == 0) {
81  return 0;
82  }
83  return ((bits - 1) / elem2bits_w(1, w)) + 1;
84  }
85  inline uint64_t bits2elem(uint64_t bits) const {
86  return bits2elem_w(bits, this->width());
87  }
88 
89  struct PosAndOffset { size_t pos; uint8_t offset; };
90  inline static PosAndOffset bitpos2backingpos_w(uint64_t bits) {
91  return PosAndOffset {
92  bits / backing2bits_w(1),
93  uint8_t(bits % backing2bits_w(1))
94  };
95  }
96  inline PosAndOffset bitpos2backingpos(uint64_t bits) const {
97  return bitpos2backingpos_w(bits);
98  }
99 
100  inline explicit BitPackingVector(): IntRepr<T>::BitPackingVectorRepr() {}
101  inline explicit BitPackingVector(size_type n): BitPackingVector() {
102  this->m_real_size = n;
103  size_t converted_size = bits2backing(elem2bits(this->m_real_size));
104  this->m_vec = std::vector<internal_data_type>(converted_size);
105  DCHECK_EQ(converted_size, this->m_vec.capacity());
106  }
107  inline BitPackingVector(size_type n, const value_type& val): BitPackingVector(n) {
108  auto ptr = this->m_vec.data();
109  uint8_t offset = 0;
110 
111  for (size_t i = 0; i < n; i++) {
112  bits::write_int_and_move(ptr, val, offset, this->width());
113  }
114  }
115 
116  inline BitPackingVector(size_type n, const value_type& val, uint8_t width):
117  BitPackingVector()
118  {
119  this->set_width_raw(width);
120  this->reserve(n);
121  this->resize(n, val);
122  DCHECK_EQ(this->m_vec.size(), bits2backing(elem2bits(n)));
123  DCHECK_EQ(this->m_vec.size(), this->m_vec.capacity());
124  }
125 
126  template <class InputIterator>
127  inline BitPackingVector(InputIterator first, InputIterator last) {
128  // TODO: specialize for random access iterator
129  for(; first != last; first++) {
130  push_back(*first);
131  }
132  }
133  inline BitPackingVector (const BitPackingVector& other):
134  IntRepr<T>::BitPackingVectorRepr(other) {}
135  inline BitPackingVector (BitPackingVector&& other):
136  IntRepr<T>::BitPackingVectorRepr(std::move(other)) {}
137  inline BitPackingVector(std::initializer_list<value_type> il):
138  BitPackingVector(il.begin(), il.end()) {}
139 
140  inline BitPackingVector& operator=(const BitPackingVector& other) {
141  this->m_vec = other.m_vec;
142  this->m_real_size = other.m_real_size;
143  this->set_width_raw(other.width());
144  return *this;
145  }
146 
147  inline BitPackingVector& operator=(BitPackingVector&& other) {
148  this->m_vec = std::move(other.m_vec);
149  this->m_real_size = other.m_real_size;
150  this->set_width_raw(other.width());
151  return *this;
152  }
153 
154  inline BitPackingVector& operator=(std::initializer_list<value_type> il) {
155  *this = BitPackingVector(il);
156  return *this;
157  }
158 
159  inline iterator begin() {
160  using Base = typename int_vector::IntPtrBase<pointer>;
161  auto x = bitpos2backingpos(0);
162  return pointer(Base(&this->m_vec[x.pos], x.offset, this->width()));
163  }
164 
165  inline iterator end() {
166  using Base = typename int_vector::IntPtrBase<pointer>;
167  auto x = bitpos2backingpos(elem2bits(this->m_real_size));
168  return pointer(Base(&this->m_vec[x.pos], x.offset, this->width()));
169  }
170 
171  inline reverse_iterator rbegin() {
172  return std::reverse_iterator<iterator>(end());
173  }
174 
175  inline reverse_iterator rend() {
176  return std::reverse_iterator<iterator>(begin());
177  }
178 
179  inline const_iterator begin() const {
180  using Base = typename int_vector::IntPtrBase<const_pointer>;
181  auto x = bitpos2backingpos(0);
182  return const_pointer(Base(&this->m_vec[x.pos], x.offset, this->width()));
183  }
184 
185  inline const_iterator end() const {
186  using Base = typename int_vector::IntPtrBase<const_pointer>;
187  auto x = bitpos2backingpos(elem2bits(this->m_real_size));
188  return const_pointer(Base(&this->m_vec[x.pos], x.offset, this->width()));
189  }
190 
191  inline const_reverse_iterator rbegin() const {
192  return std::reverse_iterator<const_iterator>(end());
193  }
194 
195  inline const_reverse_iterator rend() const {
196  return std::reverse_iterator<const_iterator>(begin());
197  }
198 
199  inline const_iterator cbegin() const {
200  return begin();
201  }
202 
203  inline const_iterator cend() const {
204  return end();
205  }
206 
207  inline const_reverse_iterator crbegin() const {
208  return rbegin();
209  }
210 
211  inline const_reverse_iterator crend() const {
212  return rend();
213  }
214 
215  inline size_type size() const {
216  return this->m_real_size;
217  }
218 
219  inline uint64_t bit_size() const {
220  return elem2bits(this->m_real_size);
221  }
222 
223  inline size_type max_size() const {
224  // Empty vector does not allocate, so this is fine
225  return std::vector<value_type>().max_size();
226  }
227 
228  inline void resize(size_type n) {
229  this->m_real_size = n;
230  this->m_vec.resize(bits2backing(elem2bits(n)), 0);
231  }
232 
233  inline void resize(size_type n, const value_type& val) {
234  auto old_size = size();
235  resize(n);
236  if (old_size < n) {
237  for (auto a = begin() + old_size, b = end(); a != b; ++a) {
238  *a = val;
239  }
240  }
241  }
242 
243  inline size_type capacity() const {
244  return bits2elem(backing2bits(this->m_vec.capacity()));
245  }
246 
247  inline bool empty() const {
248  return size() == 0;
249  }
250 
251  inline void reserve(size_type n) {
252  this->m_vec.reserve(bits2backing(elem2bits(n)));
253  }
254 
255  inline void shrink_to_fit() {
256  this->m_vec.shrink_to_fit();
257  }
258 
259  inline reference operator[](size_type n) {
260  using Base = typename int_vector::IntPtrBase<pointer>;
261  DCHECK_LT(n, size());
262  auto x = bitpos2backingpos(elem2bits(n));
263  return reference(pointer(Base(this->m_vec.data() + x.pos, x.offset, this->width())));
264  }
265 
266  inline const_reference operator[](size_type n) const {
267  using Base = typename int_vector::IntPtrBase<const_pointer>;
268  DCHECK(n < size());
269  auto x = bitpos2backingpos(elem2bits(n));
270  return const_reference(const_pointer(Base(this->m_vec.data() + x.pos, x.offset, this->width())));
271  }
272 
273  inline void range_check(size_type n) const {
274  if (n >= size()) {
275  std::stringstream ss;
276  ss << "Out-of-range access of IntVector: index is ";
277  ss << n;
278  ss << ", size() is ";
279  ss << size();
280  throw std::out_of_range(ss.str());
281  }
282  }
283 
284  inline reference at(size_type n) {
285  range_check(n);
286  return operator[](n);
287  }
288 
289  inline const_reference at(size_type n) const {
290  range_check(n);
291  return operator[](n);
292  }
293 
294  inline reference front() {
295  return operator[](0);
296  }
297 
298  inline const_reference front() const {
299  return operator[](0);
300  }
301 
302  inline reference back() {
303  return operator[](size() - 1);
304  }
305 
306  inline const_reference back() const {
307  return operator[](size() - 1);
308  }
309 
310  inline internal_data_type* data() noexcept {
311  return this->m_vec.data();
312  }
313 
314  inline const internal_data_type* data() const noexcept {
315  return this->m_vec.data();
316  }
317 
318  template <class InputIterator>
319  inline void assign(InputIterator first, InputIterator last) {
320  *this = BitPackingVector(first, last);
321  }
322 
323  inline void assign(size_type n, const value_type& val) {
324  *this = BitPackingVector(n, val);
325  }
326 
327  inline void assign(std::initializer_list<value_type> il) {
328  *this = BitPackingVector(il);
329  }
330 
331  inline void push_back(const value_type& val) {
332  this->m_real_size += 1;
333 
334  while (elem2bits(this->m_real_size) > backing2bits(this->m_vec.size())) {
335  this->m_vec.push_back(0);
336  }
337 
338  back() = val;
339  }
340 
341  inline void push_back(value_type&& val) {
342  const auto& r = val;
343  push_back(r);
344  }
345 
346  inline void pop_back() {
347  DCHECK(!empty());
348  this->m_real_size -= 1;
349  while (bits2backing(elem2bits(this->m_real_size)) < this->m_vec.size()) {
350  this->m_vec.pop_back();
351  }
352  }
353 
354  inline iterator insert(const_iterator position, const value_type& val) {
355  return insert(position, size_type(1), val);
356  }
357 
358  inline iterator insert(const_iterator position, size_type n, const value_type& val) {
359  // Remember element offset before iterator invalidation
360  auto p = (position - cbegin());
361 
362  // Step 1: Grow backing vector by needed amount
363  {
364  auto new_bits_needed = elem2bits(n);
365  auto existing_extra_bits = backing2bits(this->m_vec.size()) - elem2bits(this->m_real_size);
366 
367  if (new_bits_needed > existing_extra_bits) {
368  new_bits_needed -= existing_extra_bits;
369  } else {
370  new_bits_needed = 0;
371  }
372 
373  auto new_backing_needed = bits2backing(new_bits_needed);
374  this->m_vec.insert(this->m_vec.cend(), new_backing_needed, 0);
375  this->m_real_size += n;
376  }
377 
378  // Step 2: move elements to back, leaving a gap
379  {
380  auto start = begin() + p;
381  auto old_end = end() - n;
382  auto new_end = end();
383 
384  // NB: failed at using std::reverse_copy() for this,
385  // so writing it manually
386  while(start != old_end) {
387  --new_end;
388  --old_end;
389  *new_end = *old_end;
390  }
391  }
392 
393  // Step 3: insert new values at gap
394  {
395  for(auto a = begin() + p, b = begin() + p + n; a != b; ++a) {
396  *a = val;
397  }
398 
399  return begin() + p;
400  }
401  }
402 
403  template <class InputIterator>
404  inline iterator insert(const_iterator position, InputIterator first, InputIterator last) {
405  // TODO: Optimize for random access iterators (multiple insertions at once)
406 
407  // Remember integer offset before iterator invalidation
408  const auto p = (position - cbegin());
409  auto pi = p;
410 
411  for(; first != last; ++first) {
412  insert(cbegin() + pi, value_type(*first));
413  pi++;
414  }
415 
416  return begin() + p;
417  }
418 
419  inline iterator insert(const_iterator position, value_type&& val) {
420  const auto& v = val;
421  return insert(position, v);
422  }
423 
424  inline iterator insert(const_iterator position, std::initializer_list<value_type> il) {
425  return insert(position, il.begin(), il.end());
426  }
427 
428  inline iterator erase(const_iterator position) {
429  return erase(position, position + 1);
430  }
431 
432  inline iterator erase(const_iterator first, const_iterator last) {
433  auto from = (first - cbegin());
434  auto to = (last - cbegin());
435  auto n = to - from;
436  std::copy(begin() + to, end(), begin() + from);
437 
438  this->m_real_size -= n;
439 
440  auto obsolete_backing = this->m_vec.size() - bits2backing(elem2bits(this->m_real_size));
441  this->m_vec.erase(this->m_vec.cend() - obsolete_backing, this->m_vec.cend());
442  return begin() + from;
443  }
444 
445  inline void swap(BitPackingVector& other) {
446  this->m_vec.swap(other.m_vec);
447  std::swap(this->m_real_size, other.m_real_size);
448  auto a = this->width();
449  auto b = other.width();
450  this->set_width_raw(b);
451  other.set_width_raw(a);
452  }
453 
454  inline void clear() {
455  this->m_vec.clear();
456  this->m_real_size = 0;
457  }
458 
459  template <class... Args>
460  inline iterator emplace(const_iterator position, Args&&... args) {
461  return insert(position, value_type(std::forward<Args...>(args)...));
462  }
463 
464  template <class... Args>
465  inline void emplace_back(Args&&... args) {
466  push_back(value_type(std::forward<Args...>(args)...));
467  }
468 
469  inline uint8_t width() const {
470  return this->raw_width();
471  }
472 
473  inline void width(uint8_t w) {
474  this->bit_reserve(this->size() * w);
475  this->resize(this->size(), 0, w);
476  }
477 
478  inline void resize(size_type n, const value_type& val, uint8_t w) {
479  auto old_width = this->width();
480  auto new_width = w;
481  auto old_size = this->size();
482  auto new_size = n;
483 
484  auto new_bit_size = elem2bits_w(new_size, new_width);
485  auto common_size = std::min(old_size, new_size);
486 
487  if (old_width < new_width) {
488  // grow
489 
490  // Read from position of last element in the old width grid,
491  // and write to position of last element in the new width grid
492  auto old_p = bitpos2backingpos_w(elem2bits_w(common_size, old_width));
493  auto new_p = bitpos2backingpos_w(elem2bits_w(common_size, new_width));
494 
495  // make room for new bits, reallocating as needed
496  this->m_vec.resize(bits2backing_w(new_bit_size));
497  this->set_width_raw(w);
498  this->m_real_size = new_size;
499 
500  uint64_t* old_ptr = this->m_vec.data() + old_p.pos;
501  uint64_t* new_ptr = this->m_vec.data() + new_p.pos;
502 
503  // move elements into new width grid
504  for (uint64_t i = 0; i < common_size; i++) {
505  sdsl::bits::move_left((const uint64_t*&) old_ptr, old_p.offset, old_width);
506  auto v = sdsl::bits::read_int( old_ptr, old_p.offset, old_width);
507 
508  sdsl::bits::move_left((const uint64_t*&) new_ptr, new_p.offset, new_width);
509  sdsl::bits::write_int( new_ptr, v, new_p.offset, new_width);
510  }
511  } else if (old_width > new_width) {
512  // shrink
513 
514  uint64_t* old_ptr = this->m_vec.data();
515  uint64_t* new_ptr = this->m_vec.data();
516 
517  uint8_t old_offset = 0;
518  uint8_t new_offset = 0;
519 
520  // move elements into new width grid
521  for (uint64_t i = 0; i < common_size; i++) {
522  auto v = sdsl::bits::read_int_and_move((const uint64_t*&) old_ptr, old_offset, old_width);
523  sdsl::bits::write_int_and_move(new_ptr, v, new_offset, new_width);
524  }
525 
526  // remove extra bits, dropping as needed
527  this->m_vec.resize(bits2backing_w(new_bit_size));
528  this->set_width_raw(w);
529  this->m_real_size = new_size;
530  }
531 
532  // initialize new elements correctly
533  if (old_size < new_size) {
534  auto a = this->begin() + old_size;
535  auto b = this->end();
536  for(; a != b; ++a) {
537  *a = val;
538  }
539  }
540  }
541 
542  inline void bit_reserve(uint64_t n) {
543  this->m_vec.reserve(bits2backing(n));
544  }
545 
546  inline void reserve(uint64_t n, uint8_t w) {
547  this->bit_reserve(n * w);
548  }
549  };
550 
551  template<class N>
552  bool operator==(const BitPackingVector<N>& lhs, const BitPackingVector<N>& rhs) {
553  DCHECK(lhs.width() == rhs.width());
554  if (lhs.size() == rhs.size()) {
555  auto extra_bits = lhs.backing2bits(lhs.m_vec.size())
556  - lhs.elem2bits(lhs.m_real_size);
557 
558  if (extra_bits == 0) {
559  return lhs.m_vec == rhs.m_vec;
560  } else {
561  if (!std::equal(lhs.m_vec.cbegin(), lhs.m_vec.cend() - 1, rhs.m_vec.cbegin())) {
562  return false;
563  }
564  auto occupied_bits = lhs.backing2bits(1) - extra_bits;
565  // NB: Underflow can not happen here because there are never
566  // completely unoccupied backing integers
567  auto elements = (occupied_bits - 1) / lhs.elem2bits(1) + 1;
568 
569  return std::equal(lhs.cend() - elements, lhs.cend(), rhs.cend() - elements);
570  }
571  }
572  return false;
573  }
574 
575  template<class N>
576  bool operator!=(const BitPackingVector<N>& lhs, const BitPackingVector<N>& rhs) {
577  return !(lhs == rhs);
578  }
579 
580  template<class N>
581  bool operator<(const BitPackingVector<N>& lhs, const BitPackingVector<N>& rhs) {
582  return std::lexicographical_compare(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend());
583  }
584 
585  template<class N>
586  bool operator<=(const BitPackingVector<N>& lhs, const BitPackingVector<N>& rhs) {
587  return !(lhs > rhs);
588  }
589 
590  template<class N>
591  bool operator>(const BitPackingVector<N>& lhs, const BitPackingVector<N>& rhs) {
592  return std::lexicographical_compare(rhs.cbegin(), rhs.cend(), lhs.cbegin(), lhs.cend());
593  }
594 
595  template<class N>
596  bool operator>=(const BitPackingVector<N>& lhs, const BitPackingVector<N>& rhs) {
597  return !(lhs < rhs);
598  }
599 
600  template<class N>
601  void swap(BitPackingVector<N>& x, BitPackingVector<N>& y) {
602  x.swap(y);
603  }
604 }
605 
606 template<class T>
607 using BitPackingVector = int_vector::BitPackingVector<T>;
608 
609 }
610 
612 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
uint_impl_t & operator=(const uint_impl_t &b)
Definition: uint_t.hpp:43
bool operator>(const IntVector< T > &lhs, const IntVector< T > &rhs)
Definition: IntVector.hpp:522
bool operator==(const IntVector< T > &lhs, const IntVector< T > &rhs)
Definition: IntVector.hpp:502
void swap(IntVector< T > &lhs, IntVector< T > &rhs)
Definition: IntVector.hpp:532
len_compact_t pos
Definition: LZSSFactors.hpp:38
bool operator>=(const IntVector< T > &lhs, const IntVector< T > &rhs)
Definition: IntVector.hpp:527
bool operator!=(const IntVector< T > &lhs, const IntVector< T > &rhs)
Definition: IntVector.hpp:507