Created
May 2, 2011 14:51
-
-
Save raulbajales/951716 to your computer and use it in GitHub Desktop.
MD5 and Murmur hashing algorithms performance comparison -> Murmur WIN
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.hashcomparisons; | |
import java.security.MessageDigest; | |
import java.util.Random; | |
import org.apache.commons.lang.RandomStringUtils; | |
public class CompareMD5AndMurmur { | |
public static void main(String[] args) { | |
String[] strings = generateRandomStrings(300, 50, 500); | |
measureMD5(strings); | |
measureMurmur(strings); | |
} | |
private static String[] generateRandomStrings(int count, int minLenght, int maxLenght) { | |
String[] retValue = new String[count]; | |
Random rand = new Random(); | |
for (int i = 0; i < count; i++) { | |
int size = rand.nextInt((maxLenght + 1) - minLenght) + minLenght; | |
retValue[i] = RandomStringUtils.randomAlphanumeric(size); | |
} | |
System.out.println("Generated " + count + " random strings with random lenght from " + minLenght + " to " + maxLenght); | |
return retValue; | |
} | |
private static void measureMD5(String[] strings) { | |
long time = System.currentTimeMillis(); | |
try { | |
MessageDigest md = MessageDigest.getInstance("MD5"); | |
for (int i = 0; i < strings.length; i++) | |
md.digest(strings[i].getBytes("UTF-8")); | |
} catch (Exception e) { | |
System.exit(1); | |
} | |
time = System.currentTimeMillis() - time; | |
System.out.println("MD5 took " + time + " millis"); | |
} | |
private static void measureMurmur(String[] strings) { | |
long time = System.currentTimeMillis(); | |
try { | |
MurmurHash hasher = new MurmurHash(); | |
for (int i = 0; i < strings.length; i++) { | |
String string = strings[i]; | |
hasher.hash(string.getBytes("UTF-8"), string.length(), 2); | |
} | |
} catch (Exception e) { | |
System.exit(1); | |
} | |
time = System.currentTimeMillis() - time; | |
System.out.println("Murmur took " + time + " millis"); | |
} | |
} | |
// This is the implementation used by Cassandra, check here: | |
// http://www.docjar.com/html/api/org/apache/cassandra/utils/MurmurHash.java.html | |
class MurmurHash { | |
public int hash(byte[] data, int length, int seed) { | |
int m = 0x5bd1e995; | |
int r = 24; | |
int h = seed ^ length; | |
int len_4 = length >> 2; | |
for (int i = 0; i < len_4; i++) { | |
int i_4 = i << 2; | |
int k = data[i_4 + 3]; | |
k = k << 8; | |
k = k | (data[i_4 + 2] & 0xff); | |
k = k << 8; | |
k = k | (data[i_4 + 1] & 0xff); | |
k = k << 8; | |
k = k | (data[i_4 + 0] & 0xff); | |
k *= m; | |
k ^= k >>> r; | |
k *= m; | |
h *= m; | |
h ^= k; | |
} | |
// avoid calculating modulo | |
int len_m = len_4 << 2; | |
int left = length - len_m; | |
if (left != 0) { | |
if (left >= 3) { | |
h ^= (int) data[length - 3] << 16; | |
} | |
if (left >= 2) { | |
h ^= (int) data[length - 2] << 8; | |
} | |
if (left >= 1) { | |
h ^= (int) data[length - 1]; | |
} | |
h *= m; | |
} | |
h ^= h >>> 13; | |
h *= m; | |
h ^= h >>> 15; | |
return h; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This benchmark is flawed in many. Please use jmh for u-benchmarks