tudocomp
– The TU Dortmund Compression Framework
include/tudocomp/util.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <memory>
4 #include <algorithm>
5 #include <cmath>
6 #include <cstddef>
7 #include <fstream>
8 #include <iostream>
9 #include <memory>
10 #include <sstream>
11 #include <string>
12 #include <type_traits>
13 #include <utility>
14 #include <iomanip>
15 
16 #include <tudocomp/util/View.hpp>
17 
18 namespace tdc {
19 
21 template<class T>
22 struct char_as_uint_trait {
23  static inline uint64_t conv(const T& t) { return t; }
24 };
25 template<>
26 struct char_as_uint_trait<char> {
27  static inline uint64_t conv(const char& t) { return uint8_t(t); }
28 };
29 
30 template<class T>
31 inline uint64_t char_as_uint(const T& t) {
32  return char_as_uint_trait<T>::conv(t);
33 }
35 
45 template<class T>
46 std::string vec_to_debug_string(const T& s, size_t indent = 0) {
47  std::stringstream ss;
48  ss << "[";
49  if (s.size() > 0) {
50  for (size_t i = 0; i < s.size() - 1; i++) {
51  // crazy cast needed to bring negative char values
52  // into their representation as a unsigned one
53  ss << std::setw(indent) << uint((unsigned char) s[i]) << ", ";
54  }
55  ss << std::setw(indent) << uint((unsigned char) s[s.size() - 1]);
56  }
57  ss << "]";
58  return ss.str();
59 }
60 
70 template<class T>
71 std::string arr_to_debug_string(const T* s, size_t length) {
72  if(length == 0) return "[]";
73  std::stringstream ss;
74  ss << "[";
75  for (size_t i = 0; i < length - 1; ++i) {
76  ss << s[i] << ", ";
77  }
78  ss << s[length - 1];
79  ss << "]";
80  return ss.str();
81 }
82 
89 inline std::string byte_to_nice_ascii_char(uint64_t byte) {
90  std::stringstream out;
91  if (byte >= 32 && byte <= 127) {
92  out << "'" << char(byte) << "'";
93  } else {
94  out << byte;
95  }
96  return out.str();
97 }
98 
109 template<class T>
110 std::string vec_as_lossy_string(const T& s, size_t start = 0,
111  char replacement = '?') {
112  std::stringstream ss;
113  for (size_t i = start; i < s.size(); i++) {
114  if (s[i] < 32 || s[i] > 127) {
115  ss << replacement;
116  } else {
117  ss << char(s[i]);
118  }
119  }
120  return ss.str();
121 }
122 
127 template<typename T>
128 inline std::string to_str(const T& v) {
129  std::stringstream ss;
130  ss << v;
131  return ss.str();
132 }
133 
134 inline void debug_print_uint64_t(uint64_t v) {
135  for(int i = 0; i < 64; i++) {
136  if (i > 0 && i % 8 == 0) {
137  std::cout << " ";
138  }
139  std::cout << int((v >> (63 - i)) & 1);
140  }
141  std::cout << "\n";
142 }
143 
157 inline bool parse_number_until_other(std::istream& inp, char& last, size_t& out) {
158  size_t n = 0;
159  char c;
160  bool more = true;
161  while ((more = bool(inp.get(c)))) {
162  if (c >= '0' && c <= '9') {
163  n *= 10;
164  n += (c - '0');
165  } else {
166  last = c;
167  break;
168  }
169  }
170  out = n;
171  return more;
172 }
173 
175 inline constexpr uint_fast8_t bits_hi(uint64_t x) {
176  return x == 0 ? 0 : 64 - __builtin_clzll(x);
177 }
178 
194 inline constexpr uint_fast8_t bits_for(size_t n) {
195  return n == 0 ? 1U : bits_hi(n);
196 }
197 
204 inline constexpr size_t idiv_ceil(size_t a, size_t b) {
205  return (a / b) + ((a % b) > 0);
206 }
207 
225 inline constexpr uint_fast8_t bytes_for(size_t n) {
226  return idiv_ceil(bits_for(n), 8U);
227 }
228 
241 template<typename int_t> struct msbf;
242 
245 template<> struct msbf<uint8_t> {
248  static constexpr uint8_t pos = 7;
249 };
252 template<> struct msbf<uint16_t> {
255  static constexpr uint8_t pos = 15;
256 };
259 template<> struct msbf<uint32_t> {
262  static constexpr uint8_t pos = 31;
263 };
266 template<> struct msbf<uint64_t> {
269  static constexpr uint8_t pos = 63;
270 };
271 
282 template<class T>
283 std::vector<T> cross(std::vector<std::vector<T>>&& vs,
284  std::function<T(T, T&)> f) {
285  auto remaining = vs;
286  if (remaining.size() == 0) {
287  return {};
288  }
289 
290  std::vector<T> first = std::move(remaining[0]);
291  remaining.erase(remaining.begin());
292 
293  auto next = cross(std::move(remaining), f);
294 
295  if (next.size() == 0) {
296  return first;
297  } else {
298  std::vector<T> r;
299  for (auto& x : first) {
300  for (auto& y : next) {
301  r.push_back(f(x, y));
302  }
303  }
304  return r;
305  }
306 }
307 
312 inline std::vector<std::string> split_lines(const std::string& s) {
313  std::stringstream ss(s);
314  std::string to;
315  std::vector<std::string> ret;
316 
317  while(std::getline(ss,to,'\n')) {
318  ret.push_back(to);
319  }
320 
321  return ret;
322 }
323 
330 inline std::string indent_lines(const std::string& s, size_t indent) {
331  std::stringstream ss(s);
332  std::string to;
333  std::stringstream ret;
334 
335  bool first = true;
336  while(std::getline(ss,to,'\n')){
337  if (!first) {
338  ret << "\n";
339  }
340  ret << std::setw(indent) << "" << to;
341  first = false;
342  }
343 
344  return ret.str();
345 }
346 
354 inline std::string make_table(const std::vector<std::string>& data,
355  size_t cols,
356  bool draw_grid = true) {
357  std::vector<size_t> widths;
358  std::vector<size_t> heights;
359  std::vector<std::vector<std::string>> data_lines;
360 
361  // Gather inidividual cell dimensions
362  for (auto& elem : data) {
363  size_t w = 0;
364  size_t h = 0;
365 
366  data_lines.push_back(split_lines(elem));
367  auto& lines = data_lines[data_lines.size() - 1];
368 
369  for (auto& line : lines) {
370  w = std::max(w, line.size());
371  }
372 
373  h = lines.size();
374 
375 
376  widths.push_back(w);
377  heights.push_back(h);
378  }
379 
380  std::vector<size_t> col_widths(cols, 0);
381  std::vector<size_t> row_heights(data.size() / cols, 0);
382 
383  // Calculate max. common row/column dimensions
384  for (size_t i = 0; i < data.size(); i++) {
385  size_t x = i % cols;
386  size_t y = i / cols;
387 
388  col_widths[x] = std::max(col_widths[x], widths[i]);
389  row_heights[y] = std::max(row_heights[y], heights[i]);
390  }
391 
392  // Adjust lines to match
393  for (size_t i = 0; i < data.size(); i++) {
394  size_t x = i % cols;
395  size_t y = i / cols;
396 
397  while (data_lines[i].size() < row_heights[y]) {
398  data_lines[i].push_back("");
399  }
400 
401  for (auto& line : data_lines[i]) {
402  size_t pad = col_widths[x] - line.size();
403  line.append(pad, ' ');
404  }
405  }
406 
407  std::stringstream ret;
408 
409  // Create formatted table
410 
411  const char ROW = '-';
412  const std::string COL = " | ";
413 
414  size_t total_width = 1;
415  for (size_t i = 0; i < col_widths.size(); i++) {
416  total_width += col_widths[i] + 3;
417  }
418 
419  if (draw_grid) {
420  ret << std::string(total_width, ROW) << "\n";
421  }
422 
423  for (size_t y = 0; y < data.size() / cols; y++) {
424  for (size_t line_i = 0; line_i < row_heights[y]; line_i++ ) {
425  for (size_t x = 0; x < cols; x++) {
426  if (draw_grid && x == 0) {
427  ret << COL.substr(1);
428  }
429  if (draw_grid && x != 0) {
430  ret << COL;
431  }
432  ret << data_lines[x + y * cols][line_i];
433  if (draw_grid && x == cols - 1) {
434  ret << COL.substr(0, 2);
435  }
436  }
437  ret << "\n";
438  }
439  if (draw_grid) {
440  ret << std::string(total_width, ROW) << "\n";
441  }
442  }
443 
444  return ret.str();
445 }
446 
447 #if defined(DEBUG) && defined(PARANOID) //functions that cost more than constant time to check
448 template<class T>
449 void assert_permutation(const T& p, size_t n) {
450  for(size_t i = 0; i < n; ++i)
451  for(size_t j = 0; j < n; ++j)
452  {
453  if(i == j) continue;
454  DCHECK_NE(p[i],p[j]) << "at positions " << i << " and " << j;
455  DCHECK_LT(p[i],n);
456  }
457 }
460 template<class T>
461 void assert_permutation_offset(const T& p, size_t n,size_t offset) {
462  for(size_t i = 0; i < n; ++i)
463  for(size_t j = 0; j < n; ++j)
464  {
465  if(i == j) continue;
466  DCHECK_NE(p[i],p[j]) << "at positions " << i << " and " << j;
467  DCHECK_GE(p[i],offset);
468  DCHECK_LT(p[i],offset+n);
469  }
470 }
471 #else
472 template<class T> inline void assert_permutation(const T&, size_t) {}
473 template<class T> inline void assert_permutation_offset(const T&, size_t,size_t) {}
474 #endif
475 
476 
484 template<class T>
485 constexpr T round_up_div(T x, T y) {
486  return (x + y -1)/y;
487 }
488 
489 /*
490  * Square root by abacus algorithm, Martin Guy @ UKC, June 1985.
491  * From a book on programming abaci by Mr C. Woo.
492  * and from https://en.wikipedia.org/wiki/Methods_of_computing_square_roots
493  */
494 template<class int_t>
495 int_t isqrt(int_t num) {
496  int_t res = 0;
497  int_t bit = 1ULL << (sizeof(int_t)-2); // The second-to-top bit is set: 1 << 30 for 32 bits
498 
499  // "bit" starts at the highest power of four <= the argument.
500  while (bit > num)
501  bit >>= 2;
502 
503  while (bit != 0) {
504  if (num >= res + bit) {
505  num -= res + bit;
506  res = (res >> 1) + bit;
507  }
508  else
509  res >>= 1;
510  bit >>= 2;
511  }
512  return res;
513 }
514 
515 static inline size_t lz78_expected_number_of_remaining_elements(const size_t z, const size_t n, const size_t remaining_characters) {
516  if(remaining_characters*2 < n ) {
517  return (z*remaining_characters) / (n - remaining_characters);
518  }
519  return remaining_characters*3/(bits_for(remaining_characters));
520 }
521 
522 class MoveGuard {
523  bool m_is_not_moved;
524 public:
525  inline MoveGuard(): m_is_not_moved(true) {}
526  inline MoveGuard(const MoveGuard& other) {
527  DCHECK(other.m_is_not_moved) << "Trying to copy a already moved-from MoveGuard";
528  m_is_not_moved = other.m_is_not_moved;
529  }
530  inline MoveGuard(MoveGuard&& other) {
531  DCHECK(other.m_is_not_moved) << "Trying to move a already moved-from MoveGuard";
532  m_is_not_moved = other.m_is_not_moved;
533  other.m_is_not_moved = false;
534  }
535 
536  inline bool is_not_moved() const {
537  return m_is_not_moved;
538  }
539  inline bool is_moved() const {
540  return !m_is_not_moved;
541  }
542  inline operator bool() const {
543  return is_not_moved();
544  }
545  inline bool operator!() const {
546  return is_moved();
547  }
548 };
549 
551 namespace portable_arithmetic_shift {
552  // Source: https://gist.github.com/palotasb/de46414a93ba90fff22bdbd2327ae393
553 
554  template <typename T>
555  constexpr auto builtin_shr(T value, int amount) noexcept
556  -> decltype(value >> amount)
557  {
558  return value >> amount;
559  }
560 
561  template <typename T>
562  struct uses_arithmetic_shift : std::integral_constant<bool, builtin_shr(T(-1), 1) == -1> {};
563 
564  template <typename T = int>
565  constexpr T shift_by_portable(T value, int amount) noexcept
566  {
567  return value < 0 ?
568  amount < 0 ? ~(~value >> -amount) : -(-value << amount) :
569  amount < 0 ? value >> -amount : value << amount;
570  }
571 
572  template <typename T = int>
573  constexpr T shift_by_arithmetic(T value, int amount) noexcept
574  {
575  // Only use with negative T values when the compiler translates right shift to arithmetic shift instructions.
576  return amount < 0 ? value >> -amount : value << amount;
577  }
578 }
580 
581 template <typename T = int>
582 constexpr T shift_by(T value, int amount) noexcept
583 {
584  using namespace portable_arithmetic_shift;
585 
586  return uses_arithmetic_shift<T>::value ? shift_by_arithmetic(value, amount) : shift_by_portable(value, amount);
587 }
588 
589 inline uint64_t zero_or_next_power_of_two(uint64_t x) {
590  --x;
591  x |= x >> 1;
592  x |= x >> 2;
593  x |= x >> 4;
594  x |= x >> 8;
595  x |= x >> 16;
596  x |= x >> 32;
597 
598  return x + 1;
599 }
600 
601 }//ns
int_t isqrt(int_t num)
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
std::string to_str(const T &v)
Represent a value as a string.
constexpr size_t idiv_ceil(size_t a, size_t b)
Performs an integer division with the result rounded up to the next integer.
std::string vec_to_debug_string(const T &s, size_t indent=0)
Builds the string representation of a vector of byte values, sorrounded by square brackets ([ and ])...
void debug_print_uint64_t(uint64_t v)
void assert_permutation(const T &, size_t)
MoveGuard(MoveGuard &&other)
unsigned int uint
Definition: characterhash.h:6
uint64_t zero_or_next_power_of_two(uint64_t x)
std::string make_table(const std::vector< std::string > &data, size_t cols, bool draw_grid=true)
Renders the given dataset into an ASCII table.
constexpr uint_fast8_t bytes_for(size_t n)
Computes the number of bytes needed to store the given integer value.
Yields the position of the most significant bit for the template integer type.
constexpr T round_up_div(T x, T y)
Division with rounding up to the next integer.
void assert_permutation_offset(const T &, size_t, size_t)
constexpr uint_fast8_t bits_hi(uint64_t x)
Computes the highest set bit in an integer variable.
MoveGuard(const MoveGuard &other)
len_compact_t pos
Definition: LZSSFactors.hpp:38
std::string byte_to_nice_ascii_char(uint64_t byte)
Converts a byte value into its ASCII representation sorrounded by single quotes (&#39;) or its string rep...
std::string indent_lines(const std::string &s, size_t indent)
Indents each line of a string (separated by \n) by the specified amount of spaces.
std::string arr_to_debug_string(const T *s, size_t length)
Builds the string representation of an array of printable values, sorrounded by square brackets ([ an...
constexpr T shift_by(T value, int amount) noexcept
std::vector< T > cross(std::vector< std::vector< T >> &&vs, std::function< T(T, T &)> f)
Creates the cross product of a set of elements given a product function.
std::vector< std::string > split_lines(const std::string &s)
Splits the input string into lines (separated by \n).
std::string vec_as_lossy_string(const T &s, size_t start=0, char replacement='?')
Converts a vector of bytes into a readable ASCII string, substituting non-ASCII symbols.
bool parse_number_until_other(std::istream &inp, char &last, size_t &out)
Reads digits from the input stream (0 to 9) until a non-digit character is reached and parses them as...