Skip to content

Instantly share code, notes, and snippets.

@MihanEntalpo
Created May 24, 2014 19:59
Show Gist options
  • Save MihanEntalpo/19ba1b946484176d1ec9 to your computer and use it in GitHub Desktop.
Save MihanEntalpo/19ba1b946484176d1ec9 to your computer and use it in GitHub Desktop.
fuzzy_compare
<?php
/**
* Нечеткое сравнение строк
* @param первая строка $str1
* @param вторая строка $str2
* @param минимальный кусочек интервала сравнения $minSize
* @param максимальный кусочек интервала сравнения $maxSize - если равен 0, то будет задан автоматически
* @param string $encoding кодировка текста (обычно - utf-8)
* @return float число от 0 до 1 - процент совпадения.
*/
function fuzzyCompare($str1,$str2,$minSize=2,$maxSize=4,$encoding='utf-8')
{
$s1 = mb_strlen($str1,$encoding);
$s2 = mb_strlen($str2,$encoding);
if ($maxSize==0) $maxSize = min($s1,$s2);
$maxSize = min($maxSize,$s1,$s2);
$cnt = 0;
$num = 0;
if ($s1>$s2)
{
$from = $str1;
$where = $str2;
$S = $s1;
}
else
{
$from = $str2;
$where = $str1;
$S = $s2;
}
for($size = $minSize; $size<$maxSize; $size++)
{
//echo "size:" . $size . "\n";
for ($i=0;$i<$S - $size+1; $i++)
{
$cnt+=1;
$part = mb_substr( $from,$i,$size,$encoding);
//echo $part . " ";
if ( mb_strpos($where,$part,0,$encoding)!==FALSE )
{
//echo "!";
$num+=1;
}
//echo "\n";
}
}
//echo $cnt . ",";
//echo $num;
if ($cnt==0) return 0;
return $num/$cnt;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment