15 Meta m(
"hash_function",
"vigna",
"Vigna's splitmix Hasher");
20 x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
21 x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
28 x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
29 x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
37 Meta m(
"hash_function",
"knuth",
"Knuth's splitmix Hasher");
43 return key * 2654435769ULL;
50 Meta m(
"hash_function",
"mixer",
"MixHasher");
55 key = (~key) + (key << 21);
56 key = key ^ (key >> 24);
57 key = (key + (key << 3)) + (key << 8);
58 key = key ^ (key >> 14);
59 key = (key + (key << 2)) + (key << 4);
60 key = key ^ (key >> 28);
61 key = key + (key << 31);
68 Meta m(
"hash_function",
"noop",
"Identity");
82 Meta m(
"hash_manager",
"pow",
"Pow2 Hash Table Sizes");
92 return 1ULL<<bits_hi(std::max<len_t>(hint,3UL));
99 return index & (tablesize-1);
107 Meta m(
"hash_manager",
"direct",
"Direct Hash Table Sizes");
115 return std::max<len_t>(hint,3UL);
126 const __uint128_t v = (
static_cast<uint64_t
>(key + (
static_cast<__uint128_t
>(probe) << 64)/(tablesize))) * static_cast<__uint128_t>(tablesize);
127 const len_t ret = v >> 64;
128 DCHECK_LT(ret, tablesize);
136 Meta m(
"hash_manager",
"noob",
"Classic Modulo Reduction");
144 return std::max<len_t>(hint,3UL);
153 return index % tablesize;
160 Meta m(
"hash_manager",
"prime",
"Prime Hash Table Sizes");
166 return next_prime(hint);
172 return index % tablesize;
179 static const size_t primes[] =
181 5,11,19,37, 53ul, 97ul, 193ul, 389ul,
182 769ul, 1543ul, 3079ul, 6151ul, 12289ul,
183 24593ul, 49157ul, 98317ul, 196613ul, 393241ul,
184 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul,
185 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul,
186 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul,
187 (size_t)8589934583ull,
188 (
size_t)17179869143ull,
189 (size_t)34359738337ull,
190 (
size_t)68719476731ull,
191 (size_t)137438953447ull,
192 (
size_t)274877906899ull,
193 (size_t)549755813881ull,
194 (
size_t)1099511627689ull,
195 (size_t)2199023255531ull,
196 (
size_t)4398046511093ull,
197 (size_t)8796093022151ull,
198 (
size_t)17592186044399ull,
199 (size_t)35184372088777ull,
200 (
size_t)70368744177643ull,
201 (size_t)140737488355213ull,
202 (
size_t)281474976710597ull,
203 (size_t)562949953421231ull,
204 (
size_t)1125899906842597ull,
205 (size_t)2251799813685119ull,
206 (
size_t)4503599627370449ull,
207 (size_t)9007199254740881ull,
208 (
size_t)18014398509481951ull,
209 (size_t)36028797018963913ull,
210 (
size_t)72057594037927931ull,
211 (size_t)144115188075855859ull,
212 (
size_t)288230376151711717ull,
213 (size_t)576460752303423433ull,
214 (
size_t)1152921504606846883ull,
215 (size_t)2305843009213693951ull,
216 (
size_t)4611686018427387847ull,
217 (size_t)9223372036854775783ull,
218 (
size_t)18446744073709551557ull,
220 const size_t* __first = primes;
221 const size_t* __last = primes +
sizeof(primes)/
sizeof(primes[0]);
222 const size_t*
pos = std::lower_bound(__first, __last, __n);
223 return pos == __last ? __last[-1] : *
pos;
231 Meta m(
"hash_prober",
"quad",
"Quadratic Prober");
235 template<
class key_t>
236 static inline void init(
const key_t&) {
244 template<
class SizeManager>
245 static inline len_t get(
const len_t& i,
const len_t&,
const len_t& init,
const len_t&) {
252 Meta m(
"hash_prober",
"gauss",
"Gauss Prober");
256 template<
class key_t>
257 static inline void init(
const key_t&) {
259 template<
class SizeManager>
268 Meta m(
"hash_prober",
"linear",
"Linear Prober");
273 template<
class key_t>
274 static inline void init(
const key_t&) {
276 template<
class SizeManager>
277 static inline len_t get(
const len_t&,
const len_t& tablepos,
const len_t&,
const len_t&) {
282 template<
class SizeManager>
284 static inline len_t get(
const size_t& hash_value,
const len_t& max_value) {
285 return (1+(hash_value%(max_value-1))) | 1;
290 template<
class key_t,
class HashFcn>
295 inline void init(
const key_t& key) {
298 template<
class SizeManager>
299 inline len_t get(
const len_t&,
const len_t& tablepos,
const len_t&,
const len_t& max_value)
const {
315 Meta m(
"hash_roll",
"zbackup",
"ZBackup Rolling Hash");
320 m_val = (m_val << (sizeof(char)*sizeof(key_type))) + m_val + static_cast<key_type>(c);
324 return (m_val+m_len) % (-1);
339 Meta m(
"hash_roll",
"wordpack",
"Wordpacking Rolling Hash");
344 m_val = ((m_val + (m_val << 8)) + c);
355 template <
class Key,
class Value,
358 class EqualKey = std::equal_to<Key>,
370 Meta m(
"hash",
"ctable",
"Custom Hash Table");
383 float m_load_factor = 0.3f;
385 static constexpr
len_t initial_size = 4;
387 size_t m_collisions = 0;
388 size_t m_old_size = 0;
389 size_t m_resizes = 0;
390 size_t m_specialresizes = 0;
395 size_t collisions()
const {
return m_collisions; }
396 void collect_stats(
Env&)
const {
406 void max_load_factor(
float z) {
410 return m_load_factor;
415 HashMap(
Env&,
const size_t n,
const size_t& remaining_characters)
420 , m_size(initial_size)
421 , m_keys((key_t*) malloc(sizeof(key_t) * initial_size))
422 , m_values((value_t*) malloc(sizeof(value_t) * initial_size))
424 , m_remaining_characters(remaining_characters)
426 for(
size_t i = 0; i < m_size; ++i) m_values[i] = undef_id;
427 m_sizeman.resize(m_size);
432 if(m_keys !=
nullptr) { free(m_keys); }
433 if(m_values !=
nullptr) { free(m_values); }
435 m_keys = std::move(o.m_keys);
436 m_values = std::move(o.m_values);
437 m_size = std::move(o.m_size);
438 m_sizeman.resize(m_size);
439 m_entries = std::move(o.m_entries);
441 o.m_values =
nullptr;
443 m_collisions = ( o.m_collisions);
444 m_old_size = ( o.m_old_size);
445 m_resizes = ( o.m_resizes);
446 m_specialresizes = ( m_specialresizes);
454 if(m_keys !=
nullptr) { free(m_keys); }
455 if(m_values !=
nullptr) { free(m_values); }
463 const auto size = m_sizeman.get_min_size(hint);
465 const size_t oldsize = m_size;
467 m_sizeman.resize(m_size);
468 key_t* keys = (key_t*) malloc(
sizeof(key_t) * m_size);
469 value_t* values = (value_t*) malloc(
sizeof(value_t) * m_size);
470 for(
size_t i = 0; i < m_size; ++i) values[i] = undef_id;
475 for(
len_t i = 0; i < oldsize; ++i) {
476 if(values[i] == empty_val)
continue;
477 auto ret = insert(std::make_pair(std::move(keys[i]),std::move(values[i])));
478 DCHECK_EQ(ret.second,
true);
485 m_sizeman.resize(m_size);
486 m_values = (value_t*) realloc(m_values,
sizeof(value_t) * m_size);
488 for(
size_t i = 0; i < m_size; ++i) m_values[i] = undef_id;
489 m_keys = (key_t*) realloc(m_keys,
sizeof(key_t) * m_size);
498 : m_table(table), m_offset(offset)
502 return m_table == o.m_table && m_offset = o.m_offset;
506 m_offset = o.m_offset;
518 std::pair<Iterator,bool>
insert(std::pair<key_t,value_t>&& value) {
522 DCHECK_NE(value.second, empty_val);
523 const size_t init_hashvalue = m_h(value.first);
524 size_t hash = m_sizeman.mod_tablesize(init_hashvalue, table_size(), init_hashvalue, i);
525 m_probe.init(value.first);
526 len_t tablepos = hash;
528 if(m_values[tablepos] == empty_val) {
529 m_keys[tablepos] = std::move(value.first);
530 m_values[tablepos] = std::move(value.second);
532 if(
tdc_unlikely(table_size()*max_load_factor() < m_entries)) {
533 auto toinsert = std::make_pair(m_keys[tablepos], m_values[tablepos]);
535 size_t expected_size =
536 std::is_same<SizeManager,SizeManagerDirect>::value ?
537 (m_entries + 3.0/2.0*lz78_expected_number_of_remaining_elements(entries(),m_n,m_remaining_characters))/0.95 :
538 (m_entries + lz78_expected_number_of_remaining_elements(entries(),m_n,m_remaining_characters))/0.95;
539 expected_size = std::max<size_t>(expected_size, table_size()*1.1);
540 if(expected_size < table_size()*2.0*0.95) {
541 max_load_factor(0.95f);
542 if(std::is_same<SizeManager,SizeManagerDirect>::value) {
543 reserve(expected_size);
545 reserve(expected_size);
550 reserve((table_size()-1)<<1);
552 auto ret = insert(std::move(toinsert));
553 DCHECK_EQ(ret.second,
false);
554 return std::make_pair(ret.first,
true);
556 return std::make_pair(Iterator(
this,tablepos),
true);
558 if(m_eq(m_keys[tablepos], value.first)) {
559 return std::make_pair(Iterator(
this,tablepos),
false);
561 #ifdef USE_KNUTH_ORDERED //we used the reverse ordered of knuth since we insert values in ascending order 562 if(m_keys[tablepos] > value.first) {
564 std::swap(value.second,m_values[tablepos]);
565 hash = m_sizeman.mod_tablesize(init_hashvalue, table_size(), init_hashvalue, i);
566 m_probe.init(value.first);
571 tablepos = m_sizeman.mod_tablesize(m_probe.template get<SizeManager>(i, tablepos, hash, table_size()), table_size(), init_hashvalue, i);
572 DCHECK_LT(i, table_size()) <<
"Hash table is full!";
575 m_old_size = table_size();
576 if(m_old_size != table_size()) {
577 DVLOG(1) <<
"HashMap size " << table_size() << std::endl;
Contains the text compression and encoding framework.
Iterator(HashMap *table, len_t offset)
static len_t get(const size_t &hash_value, const len_t &max_value)
const size_t & m_remaining_characters
static void init(const key_t &)
Env create_env(Meta &&meta, const std::string &options="")
Creates an environment.
bool operator==(const Iterator &o) const
len_t mod_tablesize(const size_t index, const len_t tablesize, const size_t, const size_t)
Compute index % tablesize.
SizeManagerNoob(Env &&env)
#define tdc_unlikely(x)
Provides a hint to the compiler that x is expected to resolve to false.
ZBackupRollingHash(Env &&env)
size_t operator()(size_t key) const
std::pair< Iterator, bool > insert(std::pair< key_t, value_t > &&value)
size_t operator()(size_t key) const
size_t operator()(size_t key)
WordpackRollingHash(Env &&env)
len_t mod_tablesize(const size_t, const len_t tablesize, const size_t key, const size_t probe)
Compute index % tablesize Since tablesize is a power of two, a bitwise-AND is equivalent and faster f...
SizeManagerPow2(Env &&env)
static len_t mod_tablesize(const size_t &index, const len_t &tablesize, const size_t &, const size_t &)
Compute index % tablesize Since tablesize is a power of two, a bitwise-AND is equivalent and faster...
uint64_t operator()(uint64_t x) const
static len_t get_min_size(const len_t &hint)
void swap(IntVector< T > &lhs, IntVector< T > &rhs)
Env & env()
Provides access to the environment that the algorithm works in.
void init(const key_t &key)
static len_t get_min_size(const len_t &hint)
The lowest possible size larger than hint.
Algorithm & operator=(Algorithm const &)=default
HashMap(Env &, const size_t n, const size_t &remaining_characters)
static len_t get_min_size(const len_t &hint)
The lowest possible size larger than hint.
static len_t mod_tablesize(const size_t &index, const len_t &tablesize, const size_t &, const size_t &)
fast_t< len_compact_t > len_t
Type to represent an length value.
uint64_t operator()(uint64_t x) const
#define IF_STATS(x)
x is compiled only when the STATS_DISABLED macro is undefined.
std::pair< key_t, value_t > store_t
SizeManagerDirect(Env &&env)
static void init(const key_t &)
static void init(const key_t &)
void incorporate(T &&o, len_t newsize)
Iterator & operator=(const Iterator &o)
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
IF_STATS(size_t m_collisions=0;size_t m_old_size=0;size_t m_resizes=0;size_t m_specialresizes=0;) public
SizeManagerPrime(Env &&env)
static size_t next_prime(size_t __n)
constexpr factorid_t undef_id
Id that can be used for a non-existing factor.
QuadraticProber(Env &&env)
float max_load_factor() const noexcept
Local environment for a compression/encoding/decompression call.
Interface for algorithms.
const value_t & value() const
#define IF_DEBUG(x)
x is compiled only in debug builds.
static len_t get_min_size(const len_t &hint)
The lowest possible size larger than hint.