tudocomp
– The TU Dortmund Compression Framework
InputAlloc.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 
6 namespace tdc {namespace io {
13 
14  class InputAllocChunk;
15  using InputAllocChunkHandle = std::shared_ptr<InputAllocChunk>;
16  using InputAlloc = std::vector<InputAllocChunkHandle>;
17 
23  class InputAllocChunk {
24  size_t m_from;
25  size_t m_to;
26  std::weak_ptr<InputAlloc> m_alloc;
27  public:
28  inline InputAllocChunk(size_t from, size_t to, std::weak_ptr<InputAlloc> alloc):
29  m_from(from), m_to(to), m_alloc(alloc) {
30  DCHECK(m_alloc.lock());
31  }
32  inline size_t from() const {
33  return m_from;
34  }
35  inline size_t to() const {
36  return m_to;
37  }
38  inline std::weak_ptr<InputAlloc> alloc() const {
39  return m_alloc;
40  }
41 
42  inline virtual ~InputAllocChunk() {}
43  virtual const InputRestrictions& restrictions() const = 0;
44  virtual const InputSource& source() const = 0;
45  virtual View view() const = 0;
46  virtual RestrictedBuffer unwrap() && = 0;
47  virtual void debug_print_content() const = 0;
48  };
49 
51  class InputAllocChunkOwned: public InputAllocChunk {
52  RestrictedBuffer m_buffer;
53 
54  public:
55  InputAllocChunkOwned(RestrictedBuffer&& buffer,
56  size_t from,
57  size_t to,
58  std::weak_ptr<InputAlloc> alloc):
59  InputAllocChunk(from, to, alloc),
60  m_buffer(std::move(buffer)) {}
61  inline virtual const InputRestrictions& restrictions() const {
62  return m_buffer.restrictions();
63  }
64  inline virtual const InputSource& source() const {
65  return m_buffer.source();
66  }
67  inline virtual View view() const {
68  return m_buffer.view();
69  }
70  inline virtual RestrictedBuffer unwrap() && {
71  return std::move(m_buffer);
72  }
73  inline virtual void debug_print_content() const {
74  DVLOG(2) << " buf: " << m_buffer.view().size() << "\n";
75  }
76  };
77 
80  class InputAllocChunkReferenced: public InputAllocChunk {
81  View m_view;
82  std::shared_ptr<InputAllocChunk> m_parent;
83  public:
84  inline InputAllocChunkReferenced(
85  View view,
86  size_t from,
87  size_t to,
88  const std::shared_ptr<InputAllocChunk>& parent
89  ):
90  InputAllocChunk(from, to, parent->alloc()),
91  m_view(view),
92  m_parent(parent) {}
93  inline virtual const InputRestrictions& restrictions() const {
94  return m_parent->restrictions();
95  }
96  inline virtual const InputSource& source() const {
97  return m_parent->source();
98  }
99  inline virtual View view() const {
100  return m_view;
101  }
102  inline virtual RestrictedBuffer unwrap() && {
103  throw std::runtime_error("This is only a view");
104  }
105  inline virtual void debug_print_content() const {
106  DVLOG(2) << " parent: " << std::hex << size_t(&*m_parent) << std::dec << "\n";
107  }
108  };
109 
111  class InputAllocHandle {
112  std::shared_ptr<InputAlloc> m_ptr;
113 
114  inline void debug_print_content() const {
115  auto& vec = *m_ptr;
116 
117  DVLOG(2) << "Alloc registry:\n";
118  for (auto& e : vec) {
119  DCHECK(e);
120  auto& ee = *e;
121  DVLOG(2) << " self: " << std::hex << size_t(&ee) << std::dec << "\n";
122  DVLOG(2) << " kind: " << ee.source() << "\n";
123  DVLOG(2) << " from: " << ee.from() << "\n";
124  if (ee.to() == RestrictedBuffer::npos) {
125  DVLOG(2) << " to: <npos>" << "\n";
126  } else {
127  DVLOG(2) << " to: " << ee.to() << "\n";
128  }
129  DVLOG(2) << " rest: " << ee.restrictions() << "\n";
130  DVLOG(2) << " refs: " << e.use_count() << "\n";
131  ee.debug_print_content();
132  DVLOG(2) << "\n";
133  }
134  }
135 
136  template<typename F>
137  inline InputAllocChunkHandle create_buffer(F f) const {
138  DCHECK(m_ptr);
139  {
140  auto new_alloc = std::make_shared<InputAllocChunkOwned>(f(m_ptr));
141  m_ptr->push_back(new_alloc);
142  }
143 
144  debug_print_content();
145  return m_ptr->back();
146  }
147 
148  inline InputAllocChunkHandle create_ref(InputAllocChunkReferenced&& v) const {
149  {
150  auto new_alloc = std::make_shared<InputAllocChunkReferenced>(std::move(v));
151  m_ptr->push_back(new_alloc);
152  }
153 
154  debug_print_content();
155  return m_ptr->back();
156  }
157 
158  inline InputAllocChunkHandle create_stream(
159  const InputSource& src,
160  size_t from,
161  size_t to,
162  InputRestrictions restrictions,
163  std::vector<InputAllocChunkHandle*>& selection
164  ) const {
165  InputAllocChunkHandle* parent_ptr = nullptr;
166 
167  // Try to find the one full copy of the stream we should maximally have
168  for (auto ch_ptr : selection) {
169  auto& ch = *ch_ptr;
170  if (ch->from() == 0 && ch->to() == RestrictedBuffer::npos) {
171  parent_ptr = ch_ptr;
172  break;
173  }
174  }
175 
176  // If there isn't one yet, create it.
177  if (parent_ptr == nullptr) {
178  create_buffer([&](std::weak_ptr<InputAlloc> ptr) {
179  return InputAllocChunkOwned {
180  RestrictedBuffer(src,
181  0,
182  RestrictedBuffer::npos,
183  restrictions),
184  0,
185  RestrictedBuffer::npos,
186  ptr,
187  };
188  });
189  parent_ptr = &(m_ptr->back());
190  }
191 
192  DVLOG(2) << "After parent create:\n";
193  debug_print_content();
194 
195  // If there is one, but it differs in restrictions,
196  // ensure its unique and change the restrictions
197  auto& parent = *parent_ptr;
198  if (!(parent->restrictions() == restrictions)) {
199  CHECK(parent.unique())
200  << "Attempt to access stream `Input` in a way "
201  << "that would need to create a copy of the data.";
202 
203  auto buf = std::move(*parent).unwrap();
204  parent = std::make_unique<InputAllocChunkOwned>(
205  InputAllocChunkOwned {
206  std::move(buf).change_restrictions(restrictions),
207  0,
208  RestrictedBuffer::npos,
209  m_ptr,
210  }
211  );
212  }
213 
214  DVLOG(2) << "After parent restrict:\n";
215  debug_print_content();
216 
217  // If the whole range is requested, return parent directly
218  if (from == 0 && to == RestrictedBuffer::npos) {
219  return parent;
220  }
221 
222  // Else create a slice into it.
223 
224  // First, calculate offset in escaoed buffer:
225  size_t escaped_from = 0;
226  size_t escaped_to = 0;
227 
228  {
229  if(parent->restrictions().has_no_escape_restrictions()) {
230  escaped_from = from;
231  escaped_to = to;
232  } else {
233  uint8_t escape_byte = EscapeMap(parent->restrictions()).escape_byte();
234  auto v = parent->view();
235 
236  size_t i = 0;
237  size_t unescaped_i = 0;
238 
239  for (; i < v.size(); i++) {
240  if (unescaped_i == from) {
241  escaped_from = i;
242  break;
243  }
244  if (v[i] != escape_byte) {
245  unescaped_i++;
246  }
247  }
248 
249  if (to != RestrictedBuffer::npos) {
250  for (; i < v.size(); i++) {
251  if (unescaped_i == to) {
252  escaped_to = i;
253  break;
254  }
255  if (v[i] != escape_byte) {
256  unescaped_i++;
257  }
258  }
259  } else {
260  escaped_to = RestrictedBuffer::npos;
261  }
262  }
263 
264  if (restrictions.null_terminate()) {
265  CHECK(escaped_to == RestrictedBuffer::npos)
266  << "Can not yet slice while adding a null terminator";
267  }
268  }
269 
270  DVLOG(2) << "After ref create:\n";
271  return create_ref(InputAllocChunkReferenced {
272  parent->view().slice(escaped_from, escaped_to),
273  from,
274  to,
275  parent
276  });
277  }
278 
279  inline void cleanup_empty() {
280  auto& vec = *m_ptr;
281 
282  InputAlloc new_vec;
283 
284  for (auto p : vec) {
285  if (p) {
286  new_vec.push_back(p);
287  }
288  }
289 
290  vec = new_vec;
291  }
292 
293  inline void remove(InputAllocChunkHandle handle) {
294  for (auto& ptr : *m_ptr) {
295  if (ptr == handle) {
296  ptr.reset();
297  }
298  }
299  cleanup_empty();
300  }
301  public:
303  inline InputAllocChunkHandle find_or_construct(
304  const InputSource& src,
305  size_t from,
306  size_t to,
307  InputRestrictions restrictions) const
308  {
309  auto pred = [&](const InputSource& e) -> bool {
310  return e == src;
311  };
312 
313  std::vector<InputAllocChunkHandle*> selection;
314  for (auto& eptr : *m_ptr) {
315  if (eptr) {
316  auto& e = *eptr;
317  if (pred(e.source())) {
318  selection.push_back(&eptr);
319  }
320  }
321  }
322 
323  // Check for exact matches in the cache
324  for (auto& eptr : selection) {
325  auto& iac = **eptr;
326  auto& e = iac;
327  if (e.from() == from && e.to() == to && e.restrictions() == restrictions) {
328  return *eptr;
329  }
330  }
331 
332  // Else we need to allocate a buffer:
333  if (src.is_stream()) {
334  // Streams source are complicated,
335  // because wen need to rember the first allocation rather
336  // that creating them anew as needed
337 
338  return create_stream(src, from, to, restrictions, selection);
339  } else {
340  // File or View sources can be created arbitrarily:
341 
342  return create_buffer([&](std::weak_ptr<InputAlloc> ptr) {
343  return InputAllocChunkOwned {
344  RestrictedBuffer(src, from, to, restrictions),
345  from,
346  to,
347  ptr,
348  };
349  });
350  }
351  }
352 
353  inline InputAllocHandle(): m_ptr(std::make_shared<InputAlloc>()) {}
354  inline InputAllocHandle(std::weak_ptr<InputAlloc> weak) {
355  DCHECK(!weak.expired());
356  m_ptr = weak.lock();
357  DCHECK(m_ptr);
358  }
359 
360  friend void unregister_alloc_chunk_handle(InputAllocChunkHandle handle);
361  };
362 
368  inline void unregister_alloc_chunk_handle(InputAllocChunkHandle handle) {
369  if (handle) {
370  // If its a stream root handle then we keep it
371 
372  if (handle->source().is_stream()) {
373  if (handle->from() == 0 && handle->to() == RestrictedBuffer::npos) {
374  return;
375  }
376  }
377 
378  auto tmp = InputAllocHandle(handle->alloc());
379  tmp.remove(handle);
380  }
381  }
382 }}
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
len_compact_t src
Definition: LZSSFactors.hpp:38
ByteView View
Definition: View.hpp:25