tudocomp
– The TU Dortmund Compression Framework
wt_pc.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * include/wt_prefix_counting.hpp
3  *
4  * Copyright (C) 2017 Florian Kurpicz <florian.kurpicz@tu-dortmund.de>
5  *
6  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
7  ******************************************************************************/
8 
9 #pragma once
10 #ifndef WT_PREFIX_COUNTING
11 #define WT_PREFIX_COUNTING
12 
13 #include <cstring>
14 #include <vector>
15 
17 
18 template <typename AlphabetType, typename SizeType>
19 class wt_pc {
20 
21 public:
22  template<typename text_t>
23  wt_pc(const text_t& text, const SizeType size,
24  const SizeType levels) : _bv(levels) {
25 
26  SizeType cur_max_char = (1 << levels);
27  std::vector<SizeType> s_pos(cur_max_char, 0);
28  std::vector<SizeType> hist(cur_max_char, 0);
29  std::vector<SizeType> borders(cur_max_char, 0);
30 
31  _bv[0] = new uint64_t[(size + 63ULL) >> 6];
32  memset(_bv[0], 0, ((size + 63ULL) >> 6) * sizeof(uint64_t)); // memset is ok (all to 0)
33 
34  SizeType cur_pos = 0;
35  for (; cur_pos + 64 <= size; cur_pos += 64) {
36  uint64_t word = 0ULL;
37  for (SizeType i = 0; i < 64; ++i) {
38  ++hist[text[cur_pos + i]];
39  word <<= 1;
40  word |= ((text[cur_pos + i] >> (levels - 1)) & 1ULL);
41  }
42  _bv[0][cur_pos >> 6] = word;
43  }
44  if (size & 63ULL) {
45  uint64_t word = 0ULL;
46  for (SizeType i = 0; i < size - cur_pos; ++i) {
47  ++hist[text[cur_pos + i]];
48  word <<= 1;
49  word |= ((text[cur_pos + i] >> (levels - 1)) & 1ULL);
50  }
51  word <<= (64 - (size & 63ULL));
52  _bv[0][size >> 6] = word;
53  }
54 
55  for (SizeType level = levels - 1; level > 0; --level) {
56  const SizeType prefix_shift = (levels - level);
57  const SizeType cur_bit_shift = prefix_shift - 1;
58 
59  _bv[level] = new uint64_t[(size + 63ULL) >> 6];
60  memset(_bv[level], 0, ((size + 63ULL) >> 6) * sizeof(uint64_t)); // memset is ok (all to 0)
61 
62  cur_max_char >>= 1;
63  for (SizeType i = 0; i < cur_max_char; ++i) {
64  hist[i] = hist[i << 1] + hist[(i << 1) + 1];
65  }
66 
67  borders[0] = 0;
68  for (SizeType i = 1; i < cur_max_char; ++i) {
69  borders[i] = borders[i - 1] + hist[i - 1];
70  }
71 
72  for (SizeType i = 0; i < size; ++i) {
73  const SizeType pos = borders[text[i] >> prefix_shift]++;
74  _bv[level][pos >> 6] |= (((text[i] >> cur_bit_shift) & 1ULL)
75  << (63ULL - (pos & 63ULL)));
76  }
77  }
78  }
79 
80  auto get_bv() const {
81  return _bv;
82  }
83 
84 private:
85  std::vector<uint64_t*> _bv;
86 }; // class wt_pc
87 
89 
90 #endif // WT_PREFIX_COUNTING
91 
92 /******************************************************************************/
len_compact_t pos
Definition: LZSSFactors.hpp:38