Created
July 14, 2012 21:37
-
-
Save fish2000/3113526 to your computer and use it in GitHub Desktop.
Hamming distance in SQL on binary string types (but if it works it'll work on whatever data; I am 97% sure this is PostrgeSQL [but not PGPL-SQL])
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT * FROM table | |
ORDER BY HAMMINGDISTANCE(hash, UNHEX(<insert supplied sha256 hash here>)) ASC | |
LIMIT 10 | |
CREATE FUNCTION HAMMINGDISTANCE( | |
A0 BIGINT, A1 BIGINT, A2 BIGINT, A3 BIGINT, | |
B0 BIGINT, B1 BIGINT, B2 BIGINT, B3 BIGINT | |
) | |
RETURNS INT DETERMINISTIC | |
RETURN | |
BIT_COUNT(A0 ^ B0) + | |
BIT_COUNT(A1 ^ B1) + | |
BIT_COUNT(A2 ^ B2) + | |
BIT_COUNT(A3 ^ B3); | |
CREATE FUNCTION HAMMINGDISTANCE(A BINARY(32), B BINARY(32)) | |
RETURNS INT DETERMINISTIC | |
RETURN | |
BIT_COUNT( | |
CONV(HEX(SUBSTRING(A, 1, 8)), 16, 10) ^ | |
CONV(HEX(SUBSTRING(B, 1, 8)), 16, 10) | |
) + | |
BIT_COUNT( | |
CONV(HEX(SUBSTRING(A, 9, 8)), 16, 10) ^ | |
CONV(HEX(SUBSTRING(B, 9, 8)), 16, 10) | |
) + | |
BIT_COUNT( | |
CONV(HEX(SUBSTRING(A, 17, 8)), 16, 10) ^ | |
CONV(HEX(SUBSTRING(B, 17, 8)), 16, 10) | |
) + | |
BIT_COUNT( | |
CONV(HEX(SUBSTRING(A, 25, 8)), 16, 10) ^ | |
CONV(HEX(SUBSTRING(B, 25, 8)), 16, 10) | |
); | |
drop table if exists BinaryTest; | |
create table BinaryTest (hash binary(3)); | |
insert BinaryTest values (0xAAAAAA); | |
set @supplied = cast(0x888888 as binary); | |
select length(replace(concat( | |
bin(ascii(substr(hash,1,1)) ^ ascii(substr(@supplied,1,1))), | |
bin(ascii(substr(hash,2,1)) ^ ascii(substr(@supplied,2,1))), | |
bin(ascii(substr(hash,3,1)) ^ ascii(substr(@supplied,3,1))) | |
),'0','')) | |
from BinaryTest; | |
0xAAAAAA ^ 0x888888 = 0x222222 = 0b1000100010001000100010 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment