Skip to content

Instantly share code, notes, and snippets.

@alphaqiu
Created December 14, 2023 08:16
Show Gist options
  • Save alphaqiu/c207a9c1dce421006f4a7a3db02866ad to your computer and use it in GitHub Desktop.
Save alphaqiu/c207a9c1dce421006f4a7a3db02866ad to your computer and use it in GitHub Desktop.
package org.demo.murmurhash3;
import com.google.common.hash.Hashing;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import java.nio.charset.StandardCharsets;
/**
* 包装Google Guava包内实现的MurmurHash3算法,使实际计算得到的值与Golang语言实现的MurmurHash3算法得到的值保持一致。
* 在Golang的包 github.com/spaolacci/murmur3 中,计算得到的值 通过调用方法 murmur3.Sum32() 得到,该返回值类型为uint32
* 在Golang中,uint32长度为8个字节,最大支持数值 4294967295。
* 在Java Google Guava包Hashing.murmur3_32_fixed().hashBytes(bytes).asInt() 得到4个字节长度的整型,在求32位版本时无法通过asLong()
* 得到一个长整型。因此,需要通过Hashing.murmur3_32_fixed().hashBytes(bytes).asBytes() 来从输出的数组计算长整型。
* Java中输出的结果是大端序,Golang中输出的数组结果是小端序,因此,在这里需要对产生的数组结果颠倒顺序。并同时转换byte -> word
* 这里无法使用commons-codec,虽然它和Google Guava库输出结果一致,但commons-codec不返回数组结果。
*/
public class MurmurHash3 {
public static long sum32(String value) {
if (StringUtils.isBlank(value)) {
return 0;
}
var bytes = value.getBytes(StandardCharsets.UTF_8);
var result = Hashing.murmur3_32_fixed().hashBytes(bytes).asBytes();
int[] words = new int[result.length];
for (int i = 0; i < result.length; i++) {
words[i] = result[i] & 0xFF;
}
reverse(words);
long sumValue = 0;
sumValue += (long)words[0] << 24;
sumValue += (long)words[1] << 16;
sumValue += (long)words[2] << 8;
sumValue += words[3];
return sumValue;
}
private static void reverse(int[] array) {
if (ArrayUtils.isEmpty(array)) {
return;
}
int i = 0;
int j = array.length - 1;
int tmp;
while (j > i) {
tmp = array[j];
array[j] = array[i];
array[i] = tmp;
j--;
i++;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment