Created
November 7, 2016 17:02
-
-
Save sevaa/2d8c5d3170713acdb4984d6ae20811fd to your computer and use it in GitHub Desktop.
MurMurHash64B in Transact-SQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create function [dbo].[MurmurHash64B] | |
( | |
@s varbinary(max), | |
@seed bigint = 0 | |
) | |
returns bigint | |
as | |
begin | |
declare @len int = datalength(@s), @i int = 1 | |
declare @m bigint = 0x5bd1e995 | |
declare @h1 bigint = @len ^ (@seed & 0xffffffff) | |
declare @h2 bigint = 0 | |
declare @k1 bigint, @k2 bigint | |
while @len >= 8 | |
begin | |
set @k1 = cast(convert(binary(4), reverse(substring(@s, @i, 4))) as int) | |
set @k1 = (@k1 * @m) & 0xffffffff | |
set @k1 ^= @k1 / 0x1000000 | |
set @k1 = (@k1 * @m) & 0xffffffff | |
set @h1 = (@h1 * @m) & 0xffffffff | |
set @h1 ^= @k1; | |
set @k2 = cast(convert(binary(4), reverse(substring(@s, @i+4, 4))) as int) | |
set @k2 = (@k2 * @m) & 0xffffffff | |
set @k2 ^= @k2 / 0x1000000 | |
set @k2 = (@k2 * @m) & 0xffffffff | |
set @h2 = (@h2 * @m) & 0xffffffff | |
set @h2 ^= @k2 | |
set @len -= 8 | |
set @i += 8 | |
end | |
if @len >= 4 | |
begin | |
set @k1 = cast(convert(binary(4), reverse(substring(@s, @i, 4))) as int) | |
set @k1 = (@k1 * @m) & 0xffffffff | |
set @k1 ^= @k1 / 0x1000000 | |
set @k1 = (@k1 * @m) & 0xffffffff | |
set @h1 = (@h1 * @m) & 0xffffffff | |
set @h1 ^= @k1 | |
set @len -= 4 | |
set @i += 4 | |
end | |
if @len >= 1 | |
begin | |
if @len >= 2 | |
begin | |
if @len >= 3 | |
set @h2 ^= ascii(substring(@s, @i+2, 1)) * 0x10000 | |
set @h2 ^= ascii(substring(@s, @i+1, 1)) * 0x100 | |
end | |
set @h2 ^= ascii(substring(@s, @i, 1)) | |
set @h2 = (@h2 * @m) & 0xffffffff | |
end | |
set @h1 = ((@h1 ^ (@h2 / 0x40000)) * @m) & 0xffffffff | |
set @h2 = ((@h2 ^ (@h1 / 0x400000)) * @m) & 0xffffffff | |
set @h1 = ((@h1 ^ (@h2 / 0x20000)) * @m) & 0xffffffff | |
set @h2 = ((@h2 ^ (@h1 / 0x80000)) * @m) & 0xffffffff | |
declare @h bigint | |
if (@h1 & 0x80000000) <> 0 | |
begin | |
set @h1 &= 0x7fffffff | |
set @h = (@h1 * 0x100000000) | 0x8000000000000000 | |
end | |
else | |
set @h = @h1 * 0x100000000 | |
set @h |= @h2 | |
return @h | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
MSSQL is capable of storing 64-bit variables, but it doesn't support unsigned integer types, and treats arithmetic overflow as an exception. So the B flavor, which was originally geared towards 32-bit machines, is a better fit than A.
The official home of MurmurHash is at Github.
Update: there's also a MySQL implementation here.