Created
July 6, 2011 07:00
-
-
Save rctay/1066703 to your computer and use it in GitHub Desktop.
[gsoc-diff] alternate hashing ("golden ratio")
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c | |
index 789f4e5..28e74ae 100644 | |
--- a/xdiff/xhistogram.c | |
+++ b/xdiff/xhistogram.c | |
@@ -58,8 +58,38 @@ static int cmp_recs(xpparam_t const *xpp, xdfenv_t *env, | |
#define cmp(i, s1, l1, s2, l2) \ | |
(cmp_env(i->xpp, i->env, s1, l1, s2, l2)) | |
+/* | |
+ * From http://en.wikipedia.org/wiki/Binary_logarithm#Integer | |
+ */ | |
+static int int_log2(unsigned int n) | |
+{ | |
+ int pos; | |
+ | |
+ if (n == 0) | |
+ return -1; | |
+ | |
+ | |
+ pos = 0; | |
+ if (n >= (1 <<16)) { n >>= 16; pos += 16; } | |
+ if (n >= (1 << 8)) { n >>= 8; pos += 8; } | |
+ if (n >= (1 << 4)) { n >>= 4; pos += 4; } | |
+ if (n >= (1 << 2)) { n >>= 2; pos += 2; } | |
+ if (n >= (1 << 1)) { pos += 1; } | |
+ return pos; | |
+} | |
+ | |
+static int table_bits(unsigned int size) | |
+{ | |
+ int bits = int_log2(size); | |
+ if (bits == 0) | |
+ bits = 1; | |
+ if (1 << bits < size) | |
+ bits++; | |
+ return bits; | |
+} | |
+ | |
#define table_hash(index, side, line) \ | |
- XDL_HASHLONG((get_rec(index->env, side, line))->ha, index->table_bits) | |
+ (((unsigned int) ((get_rec(index->env, side, line))->ha * 0x9e370001UL)) >> (32-index->table_bits)) | |
static int scanA(struct histindex *index, int line1, int count1) | |
{ | |
@@ -268,7 +298,7 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, | |
index.next_ptrs = NULL; | |
index.cnts = NULL; | |
- index.table_bits = xdl_hashbits(count1); | |
+ index.table_bits = table_bits(count1); | |
sz = index.records_size = 1 << index.table_bits; | |
sz *= sizeof(struct record *); | |
if (!(index.records = (struct record **) xdl_malloc(sz))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c | |
index ea7b82d..bf1c2e5 100644 | |
--- a/xdiff/xhistogram.c | |
+++ b/xdiff/xhistogram.c | |
@@ -66,8 +66,38 @@ static int cmp_recs(xpparam_t const *xpp, xdfenv_t *env, | |
#define cmp(i, s1, l1, s2, l2) \ | |
(cmp_env(i->xpp, i->env, s1, l1, s2, l2)) | |
+/* | |
+ * From http://en.wikipedia.org/wiki/Binary_logarithm#Integer | |
+ */ | |
+static int int_log2(unsigned int n) | |
+{ | |
+ int pos; | |
+ | |
+ if (n == 0) | |
+ return -1; | |
+ | |
+ | |
+ pos = 0; | |
+ if (n >= (1 <<16)) { n >>= 16; pos += 16; } | |
+ if (n >= (1 << 8)) { n >>= 8; pos += 8; } | |
+ if (n >= (1 << 4)) { n >>= 4; pos += 4; } | |
+ if (n >= (1 << 2)) { n >>= 2; pos += 2; } | |
+ if (n >= (1 << 1)) { pos += 1; } | |
+ return pos; | |
+} | |
+ | |
+static int table_bits(unsigned int size) | |
+{ | |
+ int bits = int_log2(size); | |
+ if (bits == 0) | |
+ bits = 1; | |
+ if (1 << bits < size) | |
+ bits++; | |
+ return bits; | |
+} | |
+ | |
#define table_hash(index, side, line) \ | |
- XDL_HASHLONG((get_rec(index->env, side, line))->ha, index->table_bits) | |
+ (((unsigned int) ((get_rec(index->env, side, line))->ha * 0x9e370001UL)) >> (32-index->table_bits)) | |
static int scanA(struct histindex *index, int line1, int count1) | |
{ | |
@@ -271,7 +301,7 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, | |
index.next = NULL; | |
index.rec_idxs = NULL; | |
- index.table_bits = xdl_hashbits(count1); | |
+ index.table_bits = table_bits(count1); | |
sz = index.table_size = 1 << index.table_bits; | |
sz *= sizeof(int); | |
if (!(index.table = xdl_malloc(sz))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h | |
index 165a895..51c29f6 100644 | |
--- a/xdiff/xmacros.h | |
+++ b/xdiff/xmacros.h | |
@@ -31,9 +31,8 @@ | |
#define XDL_ABS(v) ((v) >= 0 ? (v): -(v)) | |
#define XDL_ISDIGIT(c) ((c) >= '0' && (c) <= '9') | |
#define XDL_ISSPACE(c) (isspace((unsigned char)(c))) | |
-#define XDL_ADDBITS(v,b) ((v) + ((v) >> (b))) | |
-#define XDL_MASKBITS(b) ((1UL << (b)) - 1) | |
-#define XDL_HASHLONG(v,b) (XDL_ADDBITS((unsigned long)(v), b) & XDL_MASKBITS(b)) | |
+#define XDL_GOLDEN_RATIO_PRIME 0x9e370001UL | |
+#define XDL_HASHLONG(v,b) (((unsigned int) (v * 0x9e370001UL)) >> (32-b)) | |
#define XDL_PTRFREE(p) do { if (p) { xdl_free(p); (p) = NULL; } } while (0) | |
#define XDL_LE32_PUT(p, v) \ | |
do { \ | |
diff --git a/xdiff/xutils.c b/xdiff/xutils.c | |
index ab65034..f995962 100644 | |
--- a/xdiff/xutils.c | |
+++ b/xdiff/xutils.c | |
@@ -310,11 +310,32 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) { | |
} | |
-unsigned int xdl_hashbits(unsigned int size) { | |
- unsigned int val = 1, bits = 0; | |
+/* | |
+ * From http://en.wikipedia.org/wiki/Binary_logarithm#Integer | |
+ */ | |
+static int int_log2(unsigned int n) | |
+{ | |
+ int pos = 0; | |
+ | |
+ if (n == 0) | |
+ return -1; | |
+ | |
+ if (n >= (1 <<16)) { n >>= 16; pos += 16; } | |
+ if (n >= (1 << 8)) { n >>= 8; pos += 8; } | |
+ if (n >= (1 << 4)) { n >>= 4; pos += 4; } | |
+ if (n >= (1 << 2)) { n >>= 2; pos += 2; } | |
+ if (n >= (1 << 1)) { pos += 1; } | |
+ return pos; | |
+} | |
- for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, bits++); | |
- return bits ? bits: 1; | |
+unsigned int xdl_hashbits(unsigned int size) | |
+{ | |
+ int bits = int_log2(size); | |
+ if (bits == -1) | |
+ bits = 1; | |
+ if (1 << bits < size) | |
+ bits++; | |
+ return bits; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment