31 uint64_t j = (w / 2ull) + 1;
32 DCHECK_LT((w / 2ull), j);
37 uint64_t w_mask = (1ull << (w - 1ull) << 1ull) - 1ull;
39 return (x xor ((x << j) & w_mask)) & w_mask;
58 return (n > 0ull && ((n & (n - 1ull)) == 0ull));
62 uint8_t m_capacity_log2;
77 inline size_t const&
size()
const {
82 return 1ull << m_capacity_log2;
86 return m_capacity_log2;
92 using namespace int_vector;
94 return IntPtrBase<QuotPtr>(ptr, 0, quot_width);
96 template<
typename V,
typename I>
101 V tmp = std::move(*b);
103 *b = std::move(*(b - 1));
109 template<
typename val_t>
112 template<
typename val_t>
115 void* m_ptr =
nullptr;
122 inline size_t byte_size() {
123 return val_bytes + padding + bv_bytes;
126 inline size_t bv_offset() {
127 return val_bytes + padding;
130 inline static Layout calc_sizes(
size_t size,
size_t quot_width) {
131 size_t val_size =
sizeof(val_t) * size;
132 size_t bv_size = (size * quot_width + 63ull) / 64ull *
sizeof(uint64_t);
135 DCHECK_EQ(val_size %
alignof(val_t), 0);
137 size_t rem = val_size %
alignof(uint64_t);
140 padding += (
alignof(uint64_t) - rem);
143 size_t padded_val_size = val_size + padding;
145 DCHECK_EQ(padded_val_size %
alignof(uint64_t), 0);
158 inline Ptrs ptrs(
size_t quot_width)
const {
159 size_t size = this->
size();
161 auto layout = calc_sizes(size, quot_width);
163 char* p = (
char*) m_ptr;
165 val_t* val_ptr = (val_t*)(p);
166 uint64_t* bv_ptr = (uint64_t*)(p + layout.bv_offset());
168 DCHECK_EQ(uint64_t(val_ptr) %
alignof(val_t), 0);
169 DCHECK_EQ(uint64_t(bv_ptr) %
alignof(uint64_t), 0);
178 inline Bucket(): m_bv(0), m_ptr(nullptr) {}
179 explicit inline Bucket(uint64_t bv,
size_t quot_width): m_bv(bv) {
180 size_t size = this->
size();
187 auto layout = calc_sizes(size, quot_width);
188 size_t malloc_size = layout.byte_size();
206 m_ptr = malloc(malloc_size);
210 return __builtin_popcountll(m_bv);
213 inline uint64_t
bv()
const {
218 size_t size = this->
size();
220 val_t* start = ptrs(quot_width).val_ptr;
221 val_t* end = start +
size;
223 for(; start != end; start++) {
229 if (m_ptr !=
nullptr) {
241 other.m_ptr =
nullptr;
250 other.m_ptr =
nullptr;
258 auto ps = ptrs(quot_width);
263 size_t size = this->
size();
264 size_t last = size - 1;
266 auto ps = ptrs(quot_width);
267 tdc::rotate_end_to<val_t>(ps.val_ptr,
pos, last);
268 tdc::rotate_end_to<uint64_t>(ps.quot_ptr,
pos, last);
272 return m_ptr ==
nullptr;
276 template<
typename val_t>
292 m_val_ptr(nullptr) {}
295 return uint64_t(*m_quot_ptr);
303 uint64_t tmp = uint64_t(*m_quot_ptr);
312 inline val_t
const&
val()
const {
326 return m_val_ptr == other.m_val_ptr;
330 template<
typename val_t>
332 using key_t = uint64_t;
334 static constexpr
size_t BVS_WIDTH_SHIFT = 6;
335 static constexpr
size_t BVS_WIDTH_MASK = 0b111111;
336 static constexpr
size_t DEFAULT_KEY_WIDTH = 16;
337 static constexpr
bool HIGH_BITS_RANDOM =
false;
346 std::vector<Bucket<val_t>> m_buckets;
348 inline static constexpr
size_t min_size(
size_t size) {
349 return (size < 2) ? 2 :
size;
354 inline compact_hash(
size_t size,
size_t key_width = DEFAULT_KEY_WIDTH):
355 m_sizing(min_size(size)),
359 m_cv.
resize(table_size());
360 m_buckets.reserve((m_sizing.
size() + BVS_WIDTH_MASK) << BVS_WIDTH_SHIFT);
361 m_buckets.resize((m_sizing.
size() + BVS_WIDTH_MASK) << BVS_WIDTH_SHIFT);
369 m_sizing(
std::move(other.m_sizing)),
370 m_width(
std::move(other.m_width)),
371 m_cv(
std::move(other.m_cv)),
372 m_buckets(
std::move(other.m_buckets))
379 m_sizing = std::move(other.m_sizing);
380 m_width = std::move(other.m_width);
381 m_cv = std::move(other.m_cv);
382 m_buckets = std::move(other.m_buckets);
388 inline uint8_t table_size_log2() {
392 inline size_t table_size() {
396 inline bool get_v(
size_t pos) {
397 return (m_cv.
at(pos) & 0b01) != 0;
400 inline bool get_c(
size_t pos) {
401 return (m_cv.
at(pos) & 0b10) != 0;
404 inline void set_v(
size_t pos,
bool v) {
405 auto x = m_cv.
at(pos) & 0b10;
406 m_cv.
at(pos) = x | (0b01 * v);
409 inline void set_c(
size_t pos,
bool c) {
410 auto x = m_cv.
at(pos) & 0b01;
411 m_cv.
at(pos) = x | (0b10 * c);
414 inline void sparse_drop_bucket(
size_t i) {
415 size_t qw = quotient_width();
416 m_buckets.at(i).destroy_vals(qw);
420 inline void destroy_buckets() {
421 size_t qw = quotient_width();
422 for(
size_t i = 0; i < m_buckets.size(); i++) {
423 if (!m_buckets[i].is_empty()) {
424 m_buckets[i].destroy_vals(qw);
429 inline void dcheck_key_width(uint64_t key) {
431 uint64_t key_mask = (1ull << (m_width - 1ull) << 1ull) - 1ull;
432 bool key_is_too_large = key & ~key_mask;
433 DCHECK(!key_is_too_large) <<
"Attempt to insert key " << key <<
", which requires more bits than the current set maximum of " << m_width <<
" Bits";
439 inline uint8_t real_width() {
440 return std::max(uint8_t(table_size_log2() + 1), m_width);
443 struct DecomposedKey {
444 size_t initial_address;
445 size_t stored_quotient;
448 inline DecomposedKey decompose_key(uint64_t key) {
449 dcheck_key_width(key);
452 uint64_t shift = table_size_log2();
454 if (HIGH_BITS_RANDOM) {
456 return DecomposedKey {
458 hres & ((1ull << shift) - 1ull),
461 return DecomposedKey {
462 hres & ((1ull << shift) - 1ull),
468 inline uint64_t compose_key(uint64_t initial_address, uint64_t quotient) {
469 uint64_t shift = table_size_log2();
472 if (HIGH_BITS_RANDOM) {
474 harg = (initial_address << shift) | quotient;
476 harg = (quotient << shift) | initial_address;
480 dcheck_key_width(key);
484 template<
typename handler_t>
485 inline void shift_insert_handler(
size_t from,
488 handler_t&& handler) {
491 for(
size_t i = to; i != from;) {
492 size_t next_i = mod_sub(i,
size_t(1));
494 set_c(i, get_c(next_i));
498 shift_insert_sparse_handler(from, to, key, std::move(handler));
503 struct SearchedGroup {
506 size_t groups_terminator;
512 inline SearchedGroup search_existing_group(DecomposedKey
const& key) {
513 auto ret = SearchedGroup();
518 size_t cursor = key.initial_address;
519 size_t v_counter = 0;
520 DCHECK_EQ(get_v(cursor),
true);
522 for(; sparse_exists(cursor); cursor = mod_add(cursor)) {
523 v_counter += get_v(cursor);
525 DCHECK_GE(v_counter, 1);
526 ret.groups_terminator = cursor;
529 size_t c_counter = v_counter;
530 for(; c_counter != 1; cursor = mod_sub(cursor)) {
531 c_counter -= get_c(mod_sub(cursor));
534 ret.group_end = cursor;
536 for(; c_counter != 0; cursor = mod_sub(cursor)) {
537 c_counter -= get_c(mod_sub(cursor));
540 ret.group_start = cursor;
545 inline val_t* search(SearchedGroup
const& res, uint64_t stored_quotient) {
546 for(
size_t i = res.group_start; i != res.group_end; i = mod_add(i)) {
547 auto sparse_entry = sparse_get_at(i);
549 if (sparse_entry.get_quotient() == stored_quotient) {
550 return &sparse_entry.val();
556 inline val_t* search(uint64_t key) {
557 auto dkey = decompose_key(key);
558 if (get_v(dkey.initial_address)) {
559 return search(search_existing_group(dkey), dkey.stored_quotient);
567 inline void insert(uint64_t key, val_t&& value) {
568 insert(key, std::move(value), m_width);
570 inline void insert(uint64_t key, val_t&& value,
size_t key_width) {
571 insert_handler(key, key_width, InsertHandler {
577 return index(key, m_width);
580 inline val_t&
index(uint64_t key,
size_t key_width) {
581 val_t* addr =
nullptr;
583 insert_handler(key, key_width, AddressDefaultHandler {
591 return m_sizing.
size();
597 class InsertHandler {
600 InsertHandler(val_t&& value): m_value(std::move(value)) {}
602 inline auto on_new() {
603 struct InsertHandlerOnNew {
605 inline val_t&
get() {
608 inline void new_location(val_t& value) {
612 return InsertHandlerOnNew {
617 inline void on_existing(val_t& value) {
618 m_value = std::move(value);
624 class AddressDefaultHandler {
625 val_t** m_address =
nullptr;
627 AddressDefaultHandler(val_t** address): m_address(address) {}
629 inline auto on_new() {
630 struct AddressDefaultHandlerOnNew {
633 inline val_t&
get() {
636 inline void new_location(val_t& value) {
641 return AddressDefaultHandlerOnNew {
647 inline void on_existing(val_t& value) {
652 template<
typename handler_t>
653 inline void insert_after(SearchedGroup
const& res,
654 DecomposedKey
const& dkey,
659 if (sparse_is_empty(res.group_end)) {
662 sparse_set_at_empty_handler(res.group_end,
663 dkey.stored_quotient,
668 shift_insert_handler(res.group_end,
669 res.groups_terminator,
670 dkey.stored_quotient,
675 template<
typename handler_t>
676 inline void insert_handler(uint64_t key,
size_t key_width, handler_t&& handler) {
677 grow_if_needed(key_width);
678 auto const dkey = decompose_key(key);
685 if (sparse_is_empty(dkey.initial_address)) {
688 sparse_set_at_empty_handler(dkey.initial_address,
689 dkey.stored_quotient,
693 set_v(dkey.initial_address,
true);
694 set_c(dkey.initial_address,
true);
698 bool const group_exists = get_v(dkey.initial_address);
701 auto const res = search_existing_group(dkey);
704 auto p = search(res, dkey.stored_quotient);
706 handler.on_existing(*p);
708 insert_after(res, dkey, std::move(handler));
717 set_v(dkey.initial_address,
true);
718 auto const res = search_existing_group(dkey);
721 insert_after(res, dkey, std::move(handler));
725 set_c(res.group_end,
true);
732 template<
typename int_t>
733 inline int_t mod_add(int_t v, int_t add = 1) {
734 size_t mask = table_size() - 1;
735 return (v + add) & mask;
737 template<
typename int_t>
738 inline int_t mod_sub(int_t v, int_t sub = 1) {
739 size_t mask = table_size() - 1;
740 return (v - sub) & mask;
746 size_t original_start = 0;
747 uint64_t initial_address = 0;
751 } state = EMPTY_LOCATIONS;
760 if (m_self.sparse_is_empty(i)) {
771 i = m_self.mod_add(i);
774 inline bool next(uint64_t* out_initial_address,
size_t* out_i) {
776 if (state == EMPTY_LOCATIONS) {
778 for(;;i = m_self.mod_add(i)) {
779 if (m_self.sparse_exists(i)) {
782 initial_address = m_self.mod_sub(i);
783 state = FULL_LOCATIONS;
786 if (i == original_start) {
792 if (m_self.sparse_is_empty(i)) {
793 state = EMPTY_LOCATIONS;
796 if (m_self.get_c(i)) {
802 initial_address = m_self.mod_add(initial_address);
804 while(!m_self.get_v(initial_address)) {
805 initial_address = m_self.mod_add(initial_address);
809 *out_initial_address = initial_address;
812 i = m_self.mod_add(i);
819 inline size_t quotient_width() {
820 return real_width() - table_size_log2();
823 inline void grow_if_needed(
size_t new_width) {
824 auto needs_capacity_change = [&]() {
825 return(m_sizing.
capacity() / 2) <= (m_sizing.
size() + 1);
828 auto needs_realloc = [&]() {
829 return needs_capacity_change() || (new_width != m_width);
832 if (needs_realloc()) {
834 if (needs_capacity_change()) {
835 new_capacity = m_sizing.
capacity() * 2;
851 bool start_of_bucket =
false;
854 uint64_t initial_address;
856 auto iter = iter_all_t(*
this);
857 while(iter.next(&initial_address, &i)) {
858 auto p = sparse_pos(i);
862 if (start_of_bucket) {
863 DCHECK_NE(bucket, p.bucket_pos);
864 sparse_drop_bucket(bucket);
867 start_of_bucket =
true;
868 bucket = p.bucket_pos;
871 auto kv = sparse_get_at(p);
872 auto stored_quotient = kv.get_quotient();
873 auto& val = kv.val();
874 key_t key = compose_key(initial_address, stored_quotient);
876 new_table.insert(key, std::move(val));
879 *
this = std::move(new_table);
882 DCHECK(!needs_realloc());
889 std::stringstream ss;
891 bool gap_active =
false;
895 auto print_gap = [&](){
898 ss <<
" [" << gap_start <<
" to " << gap_end <<
" empty]\n";
902 auto add_gap = [&](
size_t i){
910 std::vector<std::string> lines(table_size());
912 uint64_t initial_address;
914 auto iter = iter_all_t(*
this);
915 while(iter.next(&initial_address, &j)) {
916 std::stringstream ss2;
918 auto kv = sparse_get_at(j);
919 auto stored_quotient = kv.get_quotient();
920 auto& val = kv.val();
921 key_t key = compose_key(initial_address, stored_quotient);
924 <<
"\t: v = " << get_v(j)
925 <<
", c = " << get_c(j)
926 <<
", quot = " << stored_quotient
927 <<
", iadr = " << initial_address
928 <<
"\t, key = " << key
929 <<
"\t, value = " << val
930 <<
"\t (" << &val <<
")";
932 lines.at(j) = ss2.str();
936 for (
size_t i = 0; i < table_size(); i++) {
937 bool cv_exist = lines.at(i) !=
"";
939 DCHECK_EQ(cv_exist, sparse_exists(i));
957 auto ptr = search(key);
958 ASSERT_NE(ptr,
nullptr) <<
"key " << key <<
" not found!";
959 if (ptr !=
nullptr) {
960 ASSERT_EQ(*ptr, val) <<
"value is " << *ptr <<
" instead of " << val;
965 template<
typename handler_t>
966 inline void sparse_set_at_empty_handler(
size_t pos, key_t quot, handler_t&& handler) {
967 auto data = sparse_pos(pos);
968 DCHECK(!data.exists);
971 auto& bucket = m_buckets[data.bucket_pos];
972 size_t qw = quotient_width();
973 const size_t size = bucket.size();
976 auto value_handler = handler.on_new();
977 auto& val = value_handler.get();
983 auto new_bucket =
Bucket<val_t>(bucket.bv() | data.b_mask, qw);
986 for(
size_t i = 0; i <
size; i++) {
987 auto new_elem = new_bucket.
at(i, qw);
988 auto elem = bucket.at(i, qw);
990 new_elem.set_quotient(elem.get_quotient());
991 new(&new_elem.val()) val_t(std::move(elem.val()));
995 auto new_elem = new_bucket.at(size, qw);
996 new_elem.set_quotient(quot);
997 new(&new_elem.val()) val_t(std::move(val));
1000 new_bucket.rotate_end_to(data.offset, qw);
1003 auto new_loc = new_bucket.at(data.offset, qw);
1004 value_handler.new_location(new_loc.val());
1007 bucket.destroy_vals(qw);
1008 bucket = std::move(new_bucket);
1011 inline bool sparse_is_empty(
size_t i) {
1012 return !sparse_exists(i);
1015 inline bool sparse_exists(
size_t pos) {
1016 return sparse_pos(pos).exists;
1023 inline auto sparse_shift(
size_t from,
size_t to, val_t& val, key_t& quot) {
1024 DCHECK_LT(from, to);
1032 size_t m_quotient_width;
1034 inline void set_bucket_elem_range(
size_t end_offset) {
1035 size_t start_offset = 0;
1036 DCHECK_LE(start_offset, end_offset);
1038 m_b_start = m_bucket->
at(start_offset, m_quotient_width);
1039 m_b_end = m_bucket->
at(end_offset, m_quotient_width);
1043 SparsePos
const& pos) {
1044 m_quotient_width = table.quotient_width();
1045 m_bucket = &table.m_buckets.at(pos.bucket_pos);
1047 set_bucket_elem_range(pos.offset);
1054 inline void decrement() {
1055 if (!m_b_start.
ptr_eq(m_b_end)) {
1060 }
while(m_bucket->
bv() == 0);
1061 set_bucket_elem_range(m_bucket->
size() - 1);
1066 return !(m_b_end.
ptr_eq(other.m_b_end));
1077 auto from_loc = sparse_pos(from);
1078 auto from_iter = iter(*
this, from_loc);
1080 auto last = sparse_pos(to - 1);
1081 auto src = iter(*
this, last);
1082 auto dst = iter(*
this, sparse_pos(to));
1085 auto tmp_p = sparse_get_at(last);
1086 val_t tmp_val = std::move(tmp_p.val());
1087 key_t tmp_quot = tmp_p.get_quotient();
1090 while(
src != from_iter) {
1094 auto& src_val =
src.get().val();
1095 auto& dst_val = dst.get().val();
1097 auto src_quot =
src.get().get_quotient();
1099 dst_val = std::move(src_val);
1100 dst.get().set_quotient(src_quot);
1104 auto from_p = sparse_get_at(from_loc);
1105 from_p.val() = std::move(val);
1106 from_p.set_quotient(quot);
1109 val = std::move(tmp_val);
1110 quot = std::move(tmp_quot);
1115 template<
typename handler_t>
1116 inline void shift_insert_sparse_handler(
size_t from,
1119 handler_t&& handler) {
1124 auto value_handler = handler.on_new();
1125 auto& val = value_handler.get();
1142 loc = sparse_shift(from, table_size(), val, quot);
1143 sparse_shift(0, to, val, quot);
1148 loc = sparse_shift(from, to, val, quot);
1153 auto insert = InsertHandler(std::move(val));
1154 sparse_set_at_empty_handler(to, quot, std::move(insert));
1158 value_handler.new_location(sparse_get_at(loc).val());
1172 inline SparsePos() {}
1174 template<
typename buckets_t>
1175 inline SparsePos(
size_t pos, buckets_t& m_buckets):
1177 bucket_pos(pos >> BVS_WIDTH_SHIFT),
1180 bit_pos(pos & BVS_WIDTH_MASK),
1183 bv(m_buckets[bucket_pos].bv()),
1186 b_mask(1ull << bit_pos),
1189 exists(bv & b_mask),
1193 offset(__builtin_popcountll(bv & ((1ull << bit_pos) - 1)))
1197 SparsePos sparse_pos(
size_t pos) {
1198 return SparsePos {
pos, m_buckets };
1203 size_t qw = quotient_width();
1205 return m_buckets[pos.bucket_pos].at(pos.offset, qw);
1209 return sparse_get_at(sparse_pos(pos));
val_t & operator[](uint64_t key)
compact_hash(size_t size, size_t key_width=DEFAULT_KEY_WIDTH)
bool operator!=(const ConstGenericView< uliteral_t > &lhs, const ConstGenericView< uliteral_t > &rhs)
Contains the text compression and encoding framework.
Bucket & operator=(Bucket &&other)
A vector over arbitrary unsigned integer types.
val_t const & val() const
Bucket(uint64_t bv, size_t quot_width)
IntPtr< dynamic_t > QuotPtr
uint8_t capacity_log2() const
val_t & index(uint64_t key, size_t key_width)
void rotate_end_to(size_t pos, size_t quot_width)
uint64_t compact_reverse_hashfn(uint64_t x, uint64_t w)
void swap(IntVector< T > &lhs, IntVector< T > &rhs)
BucketElem< val_t > at(size_t pos, size_t quot_width) const
void rotate_end_to(I base, size_t pos, size_t size)
void insert(uint64_t key, val_t &&value, size_t key_width)
size_t const & size() const
void set_quotient(uint64_t v)
void destroy_vals(size_t quot_width)
SizeManager(size_t capacity)
bool ptr_eq(BucketElem const &other)
void insert(uint64_t key, val_t &&value)
void swap_quotient(uint64_t &other)
void reserve(size_type n)
compact_hash(compact_hash &&other)
compact_hash & operator=(compact_hash &&other)
uint64_t compact_hashfn(uint64_t x, uint64_t w)
std::string debug_state()
QuotPtr make_quot_ptr(uint64_t *ptr, size_t quot_width)
reference at(size_type n)
uint8_t log2_upper(uint64_t v)
void debug_check_single(uint64_t key, val_t const &val)
#define IF_DEBUG(x)
x is compiled only in debug builds.