Skip to content

Instantly share code, notes, and snippets.

@diegopso
Created June 9, 2014 20:16
Show Gist options
  • Save diegopso/64ffa35c4d7f515f24a3 to your computer and use it in GitHub Desktop.
Save diegopso/64ffa35c4d7f515f24a3 to your computer and use it in GitHub Desktop.
Script para canonizar palavras em português
<?php
function canonize($str)
{
$str = slugify($str);
$str = stem($str);
return $str;
}
function stem($str)
{
$str = preg_replace('@-de-@', '-', $str);
$str = preg_replace('@-do-@', '-', $str);
$str = preg_replace('@-da-@', '-', $str);
$str = preg_replace('@-em-@', '-', $str);
$str = preg_replace('@oes-@', 'ao-', $str);
$str = preg_replace('@oes$@', 'ao', $str);
$str = preg_replace('@is-@', 'l-', $str);
$str = preg_replace('@is$@', 'l', $str);
$str = preg_replace('@s-@', '-', $str);
$str = preg_replace('@s$@', '', $str);
return $str;
}
/**
* By miguelSantirso <http://sourcecookbook.com/en/recipes/8/function-to-slugify-strings-in-php>
*/
function slugify($text)
{
$text = preg_replace('~[^\\pL\d]+~u', '-', $text);
$text = trim($text, '-');
if (function_exists('iconv')) {
$text = iconv('utf-8', 'us-ascii//TRANSLIT', $text);
}
$text = mb_strtolower($text);
$text = preg_replace('~[^-\w]+~', '', $text);
if (empty($text)) {
return 'n-a';
}
return $text;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment