-
-
Save VaLeXaR/5c9eff15a4e90d1e4ef1ef3410c4e1df to your computer and use it in GitHub Desktop.
Multibyte Similar Text for PHP, mb_similar_text()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* solves the problem at http://stackoverflow.com/questions/31002690/how-to-use-similar-text-php-code-in-arabic, not that I know Arabic, but Norwegian also has multibyte charachters: æøåÆØÅ | |
*/ | |
//from http://www.phperz.com/article/14/1029/31806.html | |
function mb_split_str($str) { | |
preg_match_all("/./u", $str, $arr); | |
return $arr[0]; | |
} | |
//based on http://www.phperz.com/article/14/1029/31806.html, added percent | |
function mb_similar_text($str1, $str2, &$percent) { | |
$arr_1 = array_unique(mb_split_str($str1)); | |
$arr_2 = array_unique(mb_split_str($str2)); | |
$similarity = count($arr_2) - count(array_diff($arr_2, $arr_1)); | |
$percent = ($similarity * 200) / (strlen($str1) + strlen($str2) ); | |
return $similarity; | |
} | |
//demo: | |
//$var = mb_similar_text('عمار', 'ياسر', $per); | |
//output: $var = 2, $per = 25 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment