tudocomp
– The TU Dortmund Compression Framework
rabinkarphash.h
Go to the documentation of this file.
1 #ifndef KARPRABINHASH
2 #define KARPRABINHASH
3 
4 
5 #include "characterhash.h"
6 #include <tudocomp/Algorithm.hpp>
7 
8 namespace tdc {
9 
28 class KarpRabinHash : public Algorithm {
29 public:
30  typedef uint64_t hashvaluetype;
31  typedef unsigned char chartype;
32  typedef hashvaluetype key_type;
33  inline static Meta meta() {
34  Meta m("hash_roll", "rk", "Karp-Rabin Rolling Hash");
35  return m;
36  }
37  void operator+=(char c) { eat(c); }
38  hashvaluetype operator()() const { return hashvalue; }
39  void clear() { hashvalue= 0;}
40 
41  // myn is the length of the sequences, e.g., 3 means that you want to hash sequences of 3 characters
42  // mywordsize is the number of bits you which to receive as hash values, e.g., 19 means that the hash values are 19-bit integers
44  wordsize(64),
45  hasher( maskfnc<hashvaluetype>(wordsize)),
46  HASHMASK(maskfnc<hashvaluetype>(wordsize)),BtoN(1) {
47  for (int i=0; i < n ; ++i) {
48  BtoN *= B;
49  BtoN &= HASHMASK;
50  }
51  }
52 
53  // this is a convenience function, use eat,update and .hashvalue to use as a rolling hash function
54  template<class container>
55  hashvaluetype hash(container & c) {
56  hashvaluetype answer(0);
57  for(uint k = 0; k<c.size(); ++k) {
58  hashvaluetype x(1);
59  for(uint j = 0; j< c.size()-1-k; ++j) {
60  x= (x * B) & HASHMASK;
61  }
62  x= (x * hasher.hashvalues[c[k]]) & HASHMASK;
63  answer=(answer+x) & HASHMASK;
64  }
65  return answer;
66  }
67 
68  // add inchar as an input, this is used typically only at the start
69  // the hash value is updated to that of a longer string (one where inchar was appended)
70  void eat(chartype inchar) {
71  hashvalue = (B*hashvalue + hasher.hashvalues[inchar] )& HASHMASK;
72  }
73 
74  // add inchar as an input and remove outchar, the hashvalue is updated
75  // this function can be used to update the hash value from the hash value of [outchar]ABC to the hash value of ABC[inchar]
76  void update(chartype outchar, chartype inchar) {
77  hashvalue = (B*hashvalue + hasher.hashvalues[inchar] - BtoN * hasher.hashvalues[outchar]) & HASHMASK;
78  }
79 
80 
81  hashvaluetype hashvalue;
82  int n;
83  const int wordsize;
84  CharacterHash<hashvaluetype,chartype> hasher;
85  const hashvaluetype HASHMASK;
86  hashvaluetype BtoN;
87  static const hashvaluetype B=37;
88 
89 
90 };
91 
92 
93 }//ns
94 #endif
95 
96 
static const hashvaluetype B
Definition: rabinkarphash.h:87
static Meta meta()
Definition: rabinkarphash.h:33
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
void eat(chartype inchar)
Definition: rabinkarphash.h:70
unsigned int uint
Definition: characterhash.h:6
hashvaluetype BtoN
Definition: rabinkarphash.h:86
CharacterHash< hashvaluetype, chartype > hasher
Definition: rabinkarphash.h:84
void update(chartype outchar, chartype inchar)
Definition: rabinkarphash.h:76
hashvaluetype operator()() const
Definition: rabinkarphash.h:38
This is a randomized version of the Karp-Rabin hash function.
Definition: rabinkarphash.h:28
hashvaluetype hashvalue
Definition: rabinkarphash.h:81
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
unsigned char chartype
Definition: rabinkarphash.h:31
const hashvaluetype HASHMASK
Definition: rabinkarphash.h:85
void operator+=(char c)
Definition: rabinkarphash.h:37
KarpRabinHash(Env &&env)
Definition: rabinkarphash.h:43
uint64_t hashvaluetype
Definition: rabinkarphash.h:30
hashvaluetype key_type
Definition: rabinkarphash.h:32
hashvaluetype hash(container &c)
Definition: rabinkarphash.h:55
Local environment for a compression/encoding/decompression call.
Interface for algorithms.
Definition: Algorithm.hpp:15