Created
December 14, 2023 08:16
-
-
Save alphaqiu/c207a9c1dce421006f4a7a3db02866ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.demo.murmurhash3; | |
import com.google.common.hash.Hashing; | |
import org.apache.commons.lang3.ArrayUtils; | |
import org.apache.commons.lang3.StringUtils; | |
import java.nio.charset.StandardCharsets; | |
/** | |
* 包装Google Guava包内实现的MurmurHash3算法,使实际计算得到的值与Golang语言实现的MurmurHash3算法得到的值保持一致。 | |
* 在Golang的包 github.com/spaolacci/murmur3 中,计算得到的值 通过调用方法 murmur3.Sum32() 得到,该返回值类型为uint32 | |
* 在Golang中,uint32长度为8个字节,最大支持数值 4294967295。 | |
* 在Java Google Guava包Hashing.murmur3_32_fixed().hashBytes(bytes).asInt() 得到4个字节长度的整型,在求32位版本时无法通过asLong() | |
* 得到一个长整型。因此,需要通过Hashing.murmur3_32_fixed().hashBytes(bytes).asBytes() 来从输出的数组计算长整型。 | |
* Java中输出的结果是大端序,Golang中输出的数组结果是小端序,因此,在这里需要对产生的数组结果颠倒顺序。并同时转换byte -> word | |
* 这里无法使用commons-codec,虽然它和Google Guava库输出结果一致,但commons-codec不返回数组结果。 | |
*/ | |
public class MurmurHash3 { | |
public static long sum32(String value) { | |
if (StringUtils.isBlank(value)) { | |
return 0; | |
} | |
var bytes = value.getBytes(StandardCharsets.UTF_8); | |
var result = Hashing.murmur3_32_fixed().hashBytes(bytes).asBytes(); | |
int[] words = new int[result.length]; | |
for (int i = 0; i < result.length; i++) { | |
words[i] = result[i] & 0xFF; | |
} | |
reverse(words); | |
long sumValue = 0; | |
sumValue += (long)words[0] << 24; | |
sumValue += (long)words[1] << 16; | |
sumValue += (long)words[2] << 8; | |
sumValue += words[3]; | |
return sumValue; | |
} | |
private static void reverse(int[] array) { | |
if (ArrayUtils.isEmpty(array)) { | |
return; | |
} | |
int i = 0; | |
int j = array.length - 1; | |
int tmp; | |
while (j > i) { | |
tmp = array[j]; | |
array[j] = array[i]; | |
array[i] = tmp; | |
j--; | |
i++; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment