Skip to content

Instantly share code, notes, and snippets.

@AScriver
Last active November 16, 2022 17:39
Show Gist options
  • Save AScriver/fcee1aa1aad2d8c2ae681529032af79c to your computer and use it in GitHub Desktop.
Save AScriver/fcee1aa1aad2d8c2ae681529032af79c to your computer and use it in GitHub Desktop.
Finds degree of similarity between two strings, based on Dice's Coefficient, using SQL/T-SQL
-- Finds degree of similarity between two strings, based on Dice's Coefficient
DROP FUNCTION IF EXISTS dbo.CompareTwoStrings
GO
CREATE FUNCTION dbo.CompareTwoStrings (
@string1 nvarchar(4000)
, @string2 nvarchar(4000)
) RETURNS NUMERIC(10, 10)
AS BEGIN
DECLARE @i INT
DECLARE @bigram CHAR(2)
DECLARE @count INT
DECLARE @intersectionSize INT = 0
DECLARE @firstBigrams TABLE (
letter CHAR(2) COLLATE Latin1_General_CS_AS NOT NULL UNIQUE ,
cnt INT
)
SET @i = 0
WHILE @i <= 32 BEGIN
SET @string1 = REPLACE(@string1, CHAR(@i), '')
SET @string2 = REPLACE(@string2, CHAR(@i), '')
SET @i = @i + 1
END
IF (@string1 = @string2) RETURN 1
IF (LEN(@string1) < 3 OR LEN(@string2) < 3) RETURN 0
SET @i = 1
WHILE @i < LEN(@string1) BEGIN
SET @bigram = SUBSTRING(@string1, @i, @i + 2)
IF EXISTS(SELECT * FROM @firstBigrams WHERE letter = @bigram COLLATE Latin1_General_CS_AS) BEGIN
UPDATE @firstBigrams SET cnt = cnt + 1 WHERE letter = @bigram COLLATE Latin1_General_CS_AS
END ELSE BEGIN
INSERT INTO @firstBigrams (letter, cnt) VALUES (@bigram, 1)
END
SET @i = @i + 1
END
SET @i = 1
WHILE @i < LEN(@string2) BEGIN
SET @count = NULL
SET @bigram = SUBSTRING(@string2, @i, @i + 2)
SELECT @count = cnt FROM @firstBigrams WHERE letter = @bigram COLLATE Latin1_General_CS_AS
SET @count = ISNULL(@count, 0)
IF(@count > 0) BEGIN
UPDATE @firstBigrams SET cnt = @count - 1 WHERE letter = @bigram COLLATE Latin1_General_CS_AS
SET @intersectionSize = @intersectionSize + 1
END
SET @i = @i + 1
END
RETURN ((2.0 * @intersectionSize)/(LEN(@string1) + LEN(@string2) - 2))
END
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment