Skip to content

Instantly share code, notes, and snippets.

@sfaut
Created July 4, 2022 10:56
Show Gist options
  • Save sfaut/5cf74b3fe6f17d7dea72143b7c35fc0e to your computer and use it in GitHub Desktop.
Save sfaut/5cf74b3fe6f17d7dea72143b7c35fc0e to your computer and use it in GitHub Desktop.
SQL function calculating the Levenshtein distance between 2 strings
-- Author : sfaut <https://github.com/sfaut>
-- Creation date : 2022-07-04
-- Tested with MySQL 8.0.28 (@@sql_mode='ANSI,TRADITIONAL')
CREATE DEFINER="sfaut"@"%" FUNCTION "levenshtein_distance"(s1 TINYTEXT, s2 TINYTEXT)
RETURNS TINYINT UNSIGNED
DETERMINISTIC
COMMENT 'Calculate the Levenshtein distance'
BEGIN
DECLARE s1_length TINYINT UNSIGNED DEFAULT CHAR_LENGTH(s1);
DECLARE s2_length TINYINT UNSIGNED DEFAULT CHAR_LENGTH(s2);
DECLARE previous_distances, current_distances JSON DEFAULT JSON_ARRAY();
DECLARE p1, p2 TINYINT UNSIGNED;
DECLARE c1 VARCHAR(1); -- Not CHAR(1) because of auto-trim white space
DECLARE cost TINYINT UNSIGNED;
-- Remarkable cases handling
-- OK because all costs are at 1
IF s1 = s2 THEN
RETURN 0;
ELSEIF s1 = '' THEN
RETURN s2_length;
ELSEIF s2 = '' THEN
RETURN s1_length;
ELSEIF s1_length > s2_length AND POSITION(s2 IN s1) > 0 THEN
RETURN s1_length - s2_length;
ELSEIF s2_length > s1_length AND POSITION(s1 IN s2) > 0 THEN
RETURN s2_length - s1_length;
END IF;
SET p2 = 0;
WHILE p2 <= s2_length DO
SET previous_distances = JSON_ARRAY_APPEND(previous_distances, '$', p2);
SET p2 = p2 + 1;
END WHILE;
SET p1 = 1;
WHILE p1 <= s1_length DO
SET current_distances = JSON_ARRAY(p1);
SET c1 = SUBSTRING(s1 FROM p1 FOR 1);
SET p2 = 1;
WHILE p2 <= s2_length DO
SET cost = LEAST( -- Cheap cost retrieval
-- Removal cost
JSON_EXTRACT(previous_distances, CONCAT('$[', p2, ']')) + 1,
-- Insertion cost
JSON_EXTRACT(current_distances, CONCAT('$[', p2 - 1, ']')) + 1,
-- Substitution cost
JSON_EXTRACT(previous_distances, CONCAT('$[', p2 - 1, ']'))
+ IF(c1 = SUBSTRING(s2 FROM p2 FOR 1), 0, 1)
);
SET current_distances = JSON_ARRAY_APPEND(current_distances, '$', cost);
SET p2 = p2 + 1;
END WHILE;
SET previous_distances = current_distances;
SET p1 = p1 + 1;
END WHILE;
-- The result is the last cost
RETURN JSON_EXTRACT(current_distances, '$[last]');
END
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment