Skip to content

Instantly share code, notes, and snippets.

@raulbajales
Created May 2, 2011 14:51
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save raulbajales/951716 to your computer and use it in GitHub Desktop.
Save raulbajales/951716 to your computer and use it in GitHub Desktop.
MD5 and Murmur hashing algorithms performance comparison -> Murmur WIN
package com.hashcomparisons;
import java.security.MessageDigest;
import java.util.Random;
import org.apache.commons.lang.RandomStringUtils;
public class CompareMD5AndMurmur {
public static void main(String[] args) {
String[] strings = generateRandomStrings(300, 50, 500);
measureMD5(strings);
measureMurmur(strings);
}
private static String[] generateRandomStrings(int count, int minLenght, int maxLenght) {
String[] retValue = new String[count];
Random rand = new Random();
for (int i = 0; i < count; i++) {
int size = rand.nextInt((maxLenght + 1) - minLenght) + minLenght;
retValue[i] = RandomStringUtils.randomAlphanumeric(size);
}
System.out.println("Generated " + count + " random strings with random lenght from " + minLenght + " to " + maxLenght);
return retValue;
}
private static void measureMD5(String[] strings) {
long time = System.currentTimeMillis();
try {
MessageDigest md = MessageDigest.getInstance("MD5");
for (int i = 0; i < strings.length; i++)
md.digest(strings[i].getBytes("UTF-8"));
} catch (Exception e) {
System.exit(1);
}
time = System.currentTimeMillis() - time;
System.out.println("MD5 took " + time + " millis");
}
private static void measureMurmur(String[] strings) {
long time = System.currentTimeMillis();
try {
MurmurHash hasher = new MurmurHash();
for (int i = 0; i < strings.length; i++) {
String string = strings[i];
hasher.hash(string.getBytes("UTF-8"), string.length(), 2);
}
} catch (Exception e) {
System.exit(1);
}
time = System.currentTimeMillis() - time;
System.out.println("Murmur took " + time + " millis");
}
}
// This is the implementation used by Cassandra, check here:
// http://www.docjar.com/html/api/org/apache/cassandra/utils/MurmurHash.java.html
class MurmurHash {
public int hash(byte[] data, int length, int seed) {
int m = 0x5bd1e995;
int r = 24;
int h = seed ^ length;
int len_4 = length >> 2;
for (int i = 0; i < len_4; i++) {
int i_4 = i << 2;
int k = data[i_4 + 3];
k = k << 8;
k = k | (data[i_4 + 2] & 0xff);
k = k << 8;
k = k | (data[i_4 + 1] & 0xff);
k = k << 8;
k = k | (data[i_4 + 0] & 0xff);
k *= m;
k ^= k >>> r;
k *= m;
h *= m;
h ^= k;
}
// avoid calculating modulo
int len_m = len_4 << 2;
int left = length - len_m;
if (left != 0) {
if (left >= 3) {
h ^= (int) data[length - 3] << 16;
}
if (left >= 2) {
h ^= (int) data[length - 2] << 8;
}
if (left >= 1) {
h ^= (int) data[length - 1];
}
h *= m;
}
h ^= h >>> 13;
h *= m;
h ^= h >>> 15;
return h;
}
}
@karl82
Copy link

karl82 commented Sep 7, 2023

This benchmark is flawed in many. Please use jmh for u-benchmarks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment