tudocomp
– The TU Dortmund Compression Framework
LZSSFactors.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <algorithm>
4 #include <vector>
5 #include <tudocomp/def.hpp>
6 
9 
10 namespace tdc {
11 namespace lzss {
12 
13 class Factor {
14 public:
16 
17  inline Factor(len_t fpos, len_t fsrc, len_t flen)
18  : pos(fpos), src(fsrc), len(flen) {
19  }
20 } __attribute__((__packed__));
21 
22 
23 class FactorBuffer {
24 private:
25  std::vector<Factor> m_factors;
26  bool m_sorted;
27 
28  len_t m_shortest_factor;
29  len_t m_longest_factor;
30 
31 public:
32  using const_iterator = std::vector<Factor>::const_iterator;
33 
34  inline FactorBuffer()
35  : m_sorted(true)
36  , m_shortest_factor(INDEX_MAX)
37  , m_longest_factor(0)
38  {
39  }
40 
41  inline void emplace_back(len_t fpos, len_t fsrc, len_t flen) {
42  m_sorted = m_sorted && (m_factors.empty() || fpos >= m_factors.back().pos);
43  m_factors.emplace_back(fpos, fsrc, flen);
44 
45  m_shortest_factor = std::min(m_shortest_factor, flen);
46  m_longest_factor = std::max(m_longest_factor, flen);
47  }
48 
49  inline const_iterator begin() const {
50  return m_factors.cbegin();
51  }
52 
53  inline const_iterator end() const {
54  return m_factors.cend();
55  }
56 
57  inline bool empty() const {
58  return m_factors.empty();
59  }
60 
61  inline size_t size() const {
62  return m_factors.size();
63  }
64 
65  inline bool is_sorted() const {
66  return m_sorted;
67  }
68 
69  inline void sort() { //TODO: use radix sort
70  if(!m_sorted) {
71  std::sort(m_factors.begin(), m_factors.end(),
72  [](const Factor& a, const Factor& b) -> bool { return a.pos < b.pos; });
73 
74  m_sorted = true;
75  }
76  }
77 
78 public:
79  inline void flatten() {
80  if(m_factors.empty()) return; //nothing to do
81 
82  CHECK(m_sorted)
83  << "factors need to be sorted before they can be flattened";
84 
85  // create pos -> factor map
86  auto& last = m_factors.back();
87  DynamicIntVector fmap(
88  last.pos + last.len,
89  0,
90  bits_for(m_factors.size() + 1));
91 
92  for(size_t i = 0; i < m_factors.size(); i++) {
93  auto& f = m_factors[i];
94  for(size_t j = 0; j < f.len; j++) {
95  fmap[f.pos + j] = i + 1;
96  }
97  }
98 
99  // process factors
100  size_t num_flattened = 0;
101  size_t max_depth = 0;
102  for(auto& f : m_factors) {
103  size_t depth = 0;
104 
105  size_t src = f.src;
106  while(src < fmap.size() && fmap[src]) {
107  auto& s = m_factors[fmap[src] - 1];
108 
109  size_t d = src - s.pos;
110  if((s.src + d + f.len) <= (s.src + s.len)) {
111  src = s.src + d;
112 
113  //FIXME: actually, one would have to add the flatten
114  // depth of the referred factors recursively,
115  // so the depth here is only a lower bound
116  ++depth;
117  } else {
118  break;
119  }
120  }
121 
122  if(depth) {
123  f.src = src;
124 
125  ++num_flattened;
126  max_depth = std::max(max_depth, depth);
127  }
128  }
129 
130  StatPhase::log("num_flattened", num_flattened);
131  StatPhase::log("max_depth_lb", max_depth);
132  }
133 
134  inline size_t shortest_factor() const {
135  return m_shortest_factor;
136  }
137 
138  inline size_t longest_factor() const {
139  return m_longest_factor;
140  }
141 
142 };
143 
144 }} //ns
145 
len_compact_t len
Definition: LZSSFactors.hpp:15
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
Factor(len_t fpos, len_t fsrc, len_t flen)
Definition: LZSSFactors.hpp:17
len_compact_t pos
Definition: LZSSFactors.hpp:15
class tdc::lzss::FactorBuffer __attribute__
len_compact_t src
Definition: LZSSFactors.hpp:15
size_t shortest_factor() const
uint32_t len_compact_t
Type to represent an bit-compact length value.
Definition: def.hpp:103
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
size_t longest_factor() const
void emplace_back(len_t fpos, len_t fsrc, len_t flen)
Definition: LZSSFactors.hpp:41
const_iterator end() const
Definition: LZSSFactors.hpp:53
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
constexpr size_t INDEX_MAX
The maximum value of len_compact_t.
Definition: def.hpp:117
std::vector< Factor >::const_iterator const_iterator
Definition: LZSSFactors.hpp:32
size_type size() const
Definition: IntVector.hpp:284
const_iterator begin() const
Definition: LZSSFactors.hpp:49