Skip to content

Instantly share code, notes, and snippets.

@aliles
Created October 22, 2012 09:28
Show Gist options
  • Save aliles/3930614 to your computer and use it in GitHub Desktop.
Save aliles/3930614 to your computer and use it in GitHub Desktop.
Python dict hash randomisation individualised per object
diff -r 8fb438e7f738 Objects/dictobject.c
--- a/Objects/dictobject.c Sun Oct 21 18:31:25 2012 -0700
+++ b/Objects/dictobject.c Mon Oct 22 20:27:51 2012 +1100
@@ -235,6 +235,31 @@
static int dictresize(PyDictObject *mp, Py_ssize_t minused);
+/*
+Hash randomisation by repated folding controlled by a seed value. The seed
+value used is the address of the dict object. This breaks copy but avoids the
+need to persist the seed value with the object.
+ */
+static inline size_t fold_hash(size_t hash, size_t seed, size_t mask)
+{
+ asm("mov %2,%%ecx;"
+ "rol %%cl,%0;"
+ "bswap %0;"
+ "ror $8,%%ecx;"
+ "rol %%cl,%0;"
+ "bswap %0;"
+ "ror $8,%%ecx;"
+ "rol %%cl,%0;"
+ "bswap %0;"
+ "ror $8,%%ecx;"
+ "rol %%cl,%0;"
+ "bswap %0;"
+ : "=g" (hash)
+ : "0" (hash), "g" (seed)
+ : "ecx");
+ return hash & mask;
+}
+
/* Dictionary reuse scheme to save calls to malloc, free, and memset */
#ifndef PyDict_MAXFREELIST
#define PyDict_MAXFREELIST 80
@@ -471,7 +496,7 @@
top:
mask = DK_MASK(mp->ma_keys);
ep0 = &mp->ma_keys->dk_entries[0];
- i = (size_t)hash & mask;
+ i = fold_hash(hash, (size_t) mp, mask);
ep = &ep0[i];
if (ep->me_key == NULL || ep->me_key == key) {
*value_addr = &ep->me_value;
@@ -566,7 +591,7 @@
mp->ma_keys->dk_lookup = lookdict;
return lookdict(mp, key, hash, value_addr);
}
- i = (size_t)hash & mask;
+ i = fold_hash(hash, (size_t) mp, mask);;
ep = &ep0[i];
if (ep->me_key == NULL || ep->me_key == key) {
*value_addr = &ep->me_value;
@@ -630,7 +655,7 @@
mp->ma_keys->dk_lookup = lookdict;
return lookdict(mp, key, hash, value_addr);
}
- i = (size_t)hash & mask;
+ i = fold_hash(hash, (size_t) mp, mask);;
ep = &ep0[i];
assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == NULL || ep->me_key == key ||
@@ -674,7 +699,7 @@
*value_addr = &mp->ma_values[i];
return ep;
}
- i = (size_t)hash & mask;
+ i = fold_hash(hash, (size_t) mp, mask);
ep = &ep0[i];
assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == NULL || ep->me_key == key ||
@@ -773,7 +798,7 @@
assert(key != NULL);
if (!PyUnicode_CheckExact(key))
mp->ma_keys->dk_lookup = lookdict;
- i = hash & mask;
+ i = fold_hash(hash, (size_t) mp, mask);
ep = &ep0[i];
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
i = (i << 2) + i + perturb + 1;
@@ -884,7 +909,7 @@
assert(key != NULL);
assert(key != dummy);
assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
- i = hash & mask;
+ i = fold_hash(hash, (size_t) mp, mask);
ep = &ep0[i];
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
i = (i << 2) + i + perturb + 1;
@aliles
Copy link
Author

aliles commented Oct 22, 2012

@fijall has pointed out, this is an even worse idea as objects are likely to end up in the same memory position in CPython.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment