Skip to content

Instantly share code, notes, and snippets.

@shankao
Created February 1, 2016 13:04
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shankao/b0d92e15c65852fda481 to your computer and use it in GitHub Desktop.
Save shankao/b0d92e15c65852fda481 to your computer and use it in GitHub Desktop.
Levenshtein distance in PHP with multibyte support
<?php
// Levenshtein distance with multibyte support
// Improved from https://gist.github.com/santhoshtr/1710925
function mb_levenshtein($str1, $str2, $encoding = 'UTF-8', $return_lengths = false){
$length1 = mb_strlen($str1, $encoding);
$length2 = mb_strlen($str2, $encoding);
if( $str1 === $str2) {
if ($return_lengths) {
return array(0, $length1, $length2);
} else {
return 0;
}
}
if ($length1 < $length2) {
return mb_levenshtein($str2, $str1, $encoding, $return_lengths);
}
if ($length1 == 0) {
if ($return_lengths) {
return array(0, $length1, $length2);
} else {
return $length2;
}
}
$prevRow = range(0, $length2);
$currentRow = array();
$str1_array = mb_str_to_array($str1, $encoding);
$str2_array = mb_str_to_array($str2, $encoding);
for ($i = 0; $i < $length1; $i++) {
$currentRow = array();
$currentRow[0] = $i + 1;
$c1 = $str1_array[$i];
for ($j = 0; $j < $length2; $j++) {
$c2 = $str2_array[$j];
$insertions = $prevRow[$j+1] + 1;
$deletions = $currentRow[$j] + 1;
$substitutions = $prevRow[$j] + (($c1 != $c2)? 1 : 0);
$currentRow[] = min($insertions, $deletions, $substitutions);
}
$prevRow = $currentRow;
}
if ($return_lengths) {
return array($prevRow[$length2], $length1, $length2);
} else {
return $prevRow[$length2];
}
}
function mb_str_to_array($str, $encoding = 'UTF-8') {
if ($encoding == 'UTF-8') {
$array = preg_split('//u', $str, null, PREG_SPLIT_NO_EMPTY);
} else {
$array = array();
for ($i = 0; $i < mb_strlen($str, $encoding); $i++) {
$array[] = mb_substr($str, $i, 1, $encoding);
}
}
return $array;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment