Created
March 17, 2012 10:39
-
-
Save tamtam180/2057441 to your computer and use it in GitHub Desktop.
MurmurHash3のJava実装。32ビットと128ビットを求めるやつ。x86とx64で結果は異なる
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class EncodeUtils { | |
public static int toIntBE(byte[] b, int i) { | |
return ((b[i+0] << 24) + (b[i+1] << 16) + (b[i+2] << 8) + (b[i+3] << 0)); | |
} | |
public static int toIntLE(byte[] b, int i) { | |
return ((b[i+3] << 24) + (b[i+2] << 16) + (b[i+1] << 8) + (b[i+0] << 0)); | |
} | |
public static long toLongBE(byte[] b, int offset) { | |
return (((long)b[offset+0] << 56) + | |
((long)(b[offset+1] & 255) << 48) + | |
((long)(b[offset+2] & 255) << 40) + | |
((long)(b[offset+3] & 255) << 32) + | |
((long)(b[offset+4] & 255) << 24) + | |
((b[offset+5] & 255) << 16) + | |
((b[offset+6] & 255) << 8) + | |
((b[offset+7] & 255) << 0)); | |
} | |
public static long toLongLE(byte[] b, int offset) { | |
return (((long)b[offset+7] << 56) + | |
((long)(b[offset+6] & 255) << 48) + | |
((long)(b[offset+5] & 255) << 40) + | |
((long)(b[offset+4] & 255) << 32) + | |
((long)(b[offset+3] & 255) << 24) + | |
((b[offset+2] & 255) << 16) + | |
((b[offset+1] & 255) << 8) + | |
((b[offset+0] & 255) << 0)); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class MurmurHash3 { | |
private static int fmix(int h) { | |
h ^= h >>> 16; | |
h *= 0x85ebca6b; | |
h ^= h >>> 13; | |
h *= 0xc2b2ae35; | |
h ^= h >>> 16; | |
return h; | |
} | |
private static long fmix(long k) { | |
k ^= k >>> 33; | |
k *= 0xff51afd7ed558ccdL; | |
k ^= k >>> 33; | |
k *= 0xc4ceb9fe1a85ec53L; | |
k ^= k >>> 33; | |
return k; | |
} | |
public static int digest32_x86(byte[] data, int seed, boolean bigendian) { | |
final int len = data.length; | |
final int block_remain = len % 4; | |
final int block_size = len - block_remain; | |
int h1 = seed; | |
int c1 = 0xcc9e2d51; | |
int c2 = 0x1b873593; | |
int i = 0; | |
for (i = 0; i < block_size; i += 4) { | |
int k1 = bigendian ? EncodeUtils.toIntBE(data, i) : EncodeUtils.toIntLE(data, i); | |
k1 *= c1; | |
k1 = Integer.rotateLeft(k1, 15); | |
k1 *= c2; | |
h1 ^= k1; | |
h1 = Integer.rotateLeft(h1, 13); | |
h1 = h1 * 5 + 0xe6546b64; | |
} | |
int k1 = 0; | |
switch (block_remain) { | |
case 3: k1 ^= data[i+2] << 16; | |
case 2: k1 ^= data[i+1] << 8; | |
case 1: k1 ^= data[i+0]; | |
k1 *= c1; | |
k1 = Integer.rotateLeft(k1, 15); | |
k1 *= c2; | |
h1 ^=k1; | |
} | |
h1 ^= len; | |
h1 = fmix(h1); | |
return h1; | |
} | |
private static int updateK(int k, int cx1, int rnum, int cx2) { | |
k *= cx1; | |
k = Integer.rotateLeft(k, rnum); | |
k *= cx2; | |
return k; | |
} | |
private static int updateH(int h, int kx, int rnum, int hx, int cc) { | |
h ^= kx; | |
h = Integer.rotateLeft(h, rnum); | |
h += hx; | |
h = h * 5 + cc; | |
return h; | |
} | |
private static long updateK(long k, long cx1, int rnum, long cx2) { | |
k *= cx1; | |
k = Long.rotateLeft(k, rnum); | |
k *= cx2; | |
return k; | |
} | |
private static long updateH(long h, long kx, int rnum, long hx, long cc) { | |
h ^= kx; | |
h = Long.rotateLeft(h, rnum); | |
h += hx; | |
h = h * 5 + cc; | |
return h; | |
} | |
public static int[] digest128_x86(byte[] data, int seed, boolean bigendian) { | |
final int c1 = 0x239b961b; | |
final int c2 = 0xab0e9789; | |
final int c3 = 0x38b34ae5; | |
final int c4 = 0xa1e38b93; | |
final int len = data.length; | |
final int block_remain = len % 16; | |
final int block_size = len - block_remain; | |
int h1 = seed; | |
int h2 = seed; | |
int h3 = seed; | |
int h4 = seed; | |
int i = 0; | |
for (i = 0; i < block_size; i += 16) { | |
int k1 = bigendian ? EncodeUtils.toIntBE(data, i+0) : EncodeUtils.toIntLE(data, i+0); | |
int k2 = bigendian ? EncodeUtils.toIntBE(data, i+4) : EncodeUtils.toIntLE(data, i+4); | |
int k3 = bigendian ? EncodeUtils.toIntBE(data, i+8) : EncodeUtils.toIntLE(data, i+8); | |
int k4 = bigendian ? EncodeUtils.toIntBE(data, i+12) : EncodeUtils.toIntLE(data, i+12); | |
k1 = updateK(k1, c1, 15, c2); | |
h1 = updateH(h1, k1, 19, h2, 0x561ccd1b); | |
k2 = updateK(k2, c2, 16, c3); | |
h2 = updateH(h2, k2, 17, h3, 0x0bcaa747); | |
k3 = updateK(k3, c3, 17, c4); | |
h3 = updateH(h3, k3, 15, h4, 0x96cd1c35); | |
k4 = updateK(k4, c4, 18, c1); | |
h4 = updateH(h4, k4, 13, h1, 0x32ac3b17); | |
} | |
int k1 = 0; | |
int k2 = 0; | |
int k3 = 0; | |
int k4 = 0; | |
switch (block_remain) { | |
case 15: k4 ^= data[i+14] << 16; | |
case 14: k4 ^= data[i+13] << 8; | |
case 13: k4 ^= data[i+12] << 0; | |
k4 = updateK(k4, c4, 18, c1); | |
h4 ^= k4; | |
case 12: k3 ^= data[i+11] << 24; | |
case 11: k3 ^= data[i+10] << 16; | |
case 10: k3 ^= data[i+9] << 8; | |
case 9: k3 ^= data[i+8] << 0; | |
k3 = updateK(k3, c3, 17, c4); | |
h3 ^= k3; | |
case 8: k2 ^= data[i+7] << 24; | |
case 7: k2 ^= data[i+6] << 16; | |
case 6: k2 ^= data[i+5] << 8; | |
case 5: k2 ^= data[i+4] << 0; | |
k2 = updateK(k2, c2, 16, c3); | |
h2 ^= k2; | |
case 4: k1 ^= data[i+3] << 24; | |
case 3: k1 ^= data[i+2] << 16; | |
case 2: k1 ^= data[i+1] << 8; | |
case 1: k1 ^= data[i+0] << 0; | |
k1 = updateK(k1, c1, 15, c2); | |
h1 ^= k1; | |
} | |
// finalization | |
h1 ^= len; | |
h2 ^= len; | |
h3 ^= len; | |
h4 ^= len; | |
h1 += h2; h1 += h3; h1 += h4; | |
h2 += h1; h3 += h1; h4 += h1; | |
h1 = fmix(h1); | |
h2 = fmix(h2); | |
h3 = fmix(h3); | |
h4 = fmix(h4); | |
h1 += h2; h1 += h3; h1 += h4; | |
h2 += h1; h3 += h1; h4 += h1; | |
return new int[]{ h1, h2, h3, h4 }; | |
} | |
public static long[] digest128_x64(byte[] data, int seed, boolean bigendian) { | |
final long c1 = 0x87c37b91114253d5L; | |
final long c2 = 0x4cf5ad432745937fL; | |
final int len = data.length; | |
final int block_remain = len % 16; | |
final int block_size = len - block_remain; | |
long h1 = seed; | |
long h2 = seed; | |
// body | |
int i = 0; | |
for (i = 0; i < block_size; i += 16) { | |
long k1 = bigendian ? EncodeUtils.toLongBE(data, i + 0) : EncodeUtils.toLongLE(data, i + 0); | |
long k2 = bigendian ? EncodeUtils.toLongBE(data, i + 8) : EncodeUtils.toLongLE(data, i + 8); | |
k1 = updateK(k1, c1, 31, c2); | |
h1 = updateH(h1, k1, 27, h2, 0x52dce729L); | |
k2 = updateK(k2, c2, 33, c1); | |
h2 = updateH(h2, k2, 31, h1, 0x38495ab5L); | |
} | |
// tail | |
long k1 = 0; | |
long k2 = 0; | |
switch (block_remain) { | |
case 15: k2 ^= (long)data[i+14] << 48; | |
case 14: k2 ^= (long)data[i+13] << 40; | |
case 13: k2 ^= (long)data[i+12] << 32; | |
case 12: k2 ^= (long)data[i+11] << 24; | |
case 11: k2 ^= data[i+10] << 16; | |
case 10: k2 ^= data[i+9] << 8; | |
case 9: k2 ^= data[i+8] << 0; | |
k2 = updateK(k2, c2, 33, k2); | |
h2 ^= k2; | |
case 8: k1 ^= (long)data[i+7] << 56; | |
case 7: k1 ^= (long)data[i+6] << 48; | |
case 6: k1 ^= (long)data[i+5] << 40; | |
case 5: k1 ^= (long)data[i+4] << 32; | |
case 4: k1 ^= (long)data[i+3] << 24; | |
case 3: k1 ^= data[i+2] << 16; | |
case 2: k1 ^= data[i+1] << 8; | |
case 1: k1 ^= data[i+0] << 0; | |
k1 = updateK(k1, c1, 31, c2); | |
h1 ^= k1; | |
} | |
// finalization | |
h1 ^= len; | |
h2 ^= len; | |
h1 += h2; | |
h2 += h1; | |
h1 = fmix(h1); | |
h2 = fmix(h2); | |
h1 += h2; | |
h2 += h1; | |
return new long[]{ h1, h2 }; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment