Created
December 9, 2015 21:27
-
-
Save petrovitch/eee2fb5d581c5113aa35 to your computer and use it in GitHub Desktop.
Levenshtein Distance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* levenshtein.php | |
* | |
* CID 13a745cd-f086-4e7b-9594-79746e3d02d0 | |
* | |
* Levenshtein Distance is a measure of the least number of transformations that need | |
* to be applied to a string to convert it into another string. We can use this to measure | |
* the difference between two strings. This allows us to test responses provided by users to | |
* determine how different they are to each other. In the results presented a high Levenshtein | |
* distance is better. Levenshtein Distance is highly correlated with Length. | |
* | |
* Compute edit distance (Levenshtein) to suggest best replacement for misspelled word. | |
* Note: The levenshtein() function returns -1 if one of the strings exceeds 255 characters. | |
* The levenshtein() function is not case-sensitive. The levenshtein() function is faster | |
* than the similar_text() function. However, similar_text() will give you a more accurate | |
* result with less modifications needed. | |
* | |
* The levenshtein() function returns the Levenshtein distance between two strings. | |
* The Levenshtein distance is the number of characters you have to replace, insert | |
* or delete to transform string1 into string2. By default, PHP gives each operation | |
* (replace, insert, and delete) equal weight. However, you can define the cost of | |
* each operation by setting the optional insert, replace, and delete parameters. | |
* | |
* The function similar_text — Calculate the similarity between two strings, and | |
* returns the number of matching chars in both strings. | |
* | |
* @param $input #word to be checked for misspelling | |
* @ret true #if word is found in dictionary, returns empty value | |
* @ret false #if word is not found in dictionary, return an array of closest words | |
* | |
* @example | |
* $levenshtein = new Levenshtein(2); | |
* echo "<pre>"; | |
* $str = 'zugzwang perl fiancheto perl'; | |
* print_r($str); | |
* $words = preg_split("/\s/", $str); | |
* $words = array_unique($words); | |
* sort($words); | |
* print_r($words); | |
* foreach ($words as $word) | |
* { | |
* print_r($word); | |
* $retVal = $levenshtein->check_spelling($word); | |
* print_r($retVal); | |
* } | |
* echo "</pre>"; | |
*/ | |
class Levenshtein | |
{ | |
private $n; // Show n nearest words | |
public function __construct($n) | |
{ | |
$this->n = $n; // Show n nearest words | |
} | |
public function check_spelling($input) | |
{ | |
$input = strtolower($input); | |
$input = trim($input); | |
$db = mysql_connect('10.25.34.253', 'root', 'portal01'); | |
mysql_select_db('claims'); | |
$query = "SELECT * FROM claims.dictionary ORDER BY word"; | |
$result = mysql_query($query, $db); | |
for ($i = 0; $i < mysql_num_rows($result); $i++) | |
{ | |
$row = mysql_fetch_array($result); | |
$words[] = $row["word"]; | |
} | |
mysql_close($db); | |
/** | |
* No shortest distance found, yet | |
*/ | |
$shortest = -1; | |
/** | |
* Loop through words in dictionary | |
*/ | |
foreach ($words as $word) | |
{ | |
/** | |
* Calculate the edit distance between the input word, and the current word | |
*/ | |
$lev = levenshtein($input, $word); | |
/** | |
* Check for an exact match | |
*/ | |
if ($lev == 0) | |
{ | |
$closest = $word; | |
$shortest = 0; | |
break; | |
} | |
/** | |
* If this distance is less than the next found shortest distance, | |
* OR if a next shortest word has not yet been found | |
*/ | |
if ($lev <= $shortest || $shortest < 0) | |
{ | |
/** | |
* Set the closest match, and shortest distance | |
*/ | |
$shortest = $lev; | |
$arr[] = $word; | |
} | |
} | |
/** | |
* Show n words with nearest distance. | |
*/ | |
$closest = array_slice($arr, sizeof($arr) - $this->n, sizeof($arr)); | |
if ($shortest == 0) | |
{ | |
return; | |
} | |
else | |
{ | |
return $closest; | |
} | |
} | |
} // Class | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment