Skip to content

Instantly share code, notes, and snippets.

@PEKTOP
Last active August 29, 2015 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PEKTOP/f22d21aac6f5815df80b to your computer and use it in GitHub Desktop.
Save PEKTOP/f22d21aac6f5815df80b to your computer and use it in GitHub Desktop.
Slug
<?php
class Slug
{
/**
* @param string $string
* @return string
*/
public function make($string)
{
if (is_string($string)) {
return $this->sanitize($this->ruToLat($string));
}
return null;
}
/**
* @param string $str
* @param string $schema
* @return string
*/
protected function ruToLat($str, $schema = 'gost')
{
$str = mb_strtolower($str, 'UTF-8');
$map = [
'passport' => [
'а' => 'a',
'б' => 'b',
'в' => 'v',
'г' => 'g',
'д' => 'd',
'е' => 'e',
'ё' => 'e',
'ж' => 'zh',
'з' => 'z',
'и' => 'i',
'й' => 'i',
'к' => 'k',
'л' => 'l',
'м' => 'm',
'н' => 'n',
'о' => 'o',
'п' => 'p',
'р' => 'r',
'с' => 's',
'т' => 't',
'у' => 'u',
'ф' => 'f',
'х' => 'kh',
'ц' => 'tc',
'ч' => 'ch',
'ш' => 'sh',
'щ' => 'shch',
'ъ' => '',
'ь' => '',
'ы' => 'y',
'э' => 'e',
'ю' => 'iu',
'я' => 'ia',
],
'gost' => [
'а' => 'a',
'б' => 'b',
'в' => 'v',
'г' => 'g',
'д' => 'd',
'е' => 'e',
'ё' => 'yo',
'ж' => 'zh',
'з' => 'z',
'и' => 'i',
'й' => 'j',
'к' => 'k',
'л' => 'l',
'м' => 'm',
'н' => 'n',
'о' => 'o',
'п' => 'p',
'р' => 'r',
'с' => 's',
'т' => 't',
'у' => 'u',
'ф' => 'f',
'х' => 'x',
'ц' => 'c',
'ч' => 'ch',
'ш' => 'sh',
'щ' => 'shh',
'ъ' => '',
'ь' => '',
'ы' => 'y',
'э' => 'e',
'ю' => 'yu',
'я' => 'ya',
]
];
return strtr($str, $map[$schema]);
}
/**
* Sanitizes a title, replacing whitespace and a few other characters with dashes.
*
* Limits the output to alphanumeric characters, underscore (_) and dash (-).
* Whitespace becomes a dash.
*
* @param string $title The title to be sanitized.
* @return string The sanitized title.
*/
protected function sanitize($title) {
$title = strip_tags($title);
// Preserve escaped octets.
$title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
// Remove percent signs that are not part of an octet.
$title = str_replace('%', '', $title);
// Restore octets.
$title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
if ($this->seems_utf8($title)) {
if (function_exists('mb_strtolower')) {
$title = mb_strtolower($title, 'UTF-8');
}
$title = $this->utf8_uri_encode($title, 200);
}
$title = preg_replace('/&.+?;/', '', $title); // kill entities
$title = str_replace('.', '-', $title);
// Convert nbsp, ndash and mdash to hyphens
$title = str_replace( [ '%c2%a0', '%e2%80%93', '%e2%80%94' ], '-', $title );
// Strip these characters entirely
$title = str_replace( [
// iexcl and iquest
'%c2%a1', '%c2%bf',
// angle quotes
'%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba',
// curly quotes
'%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d',
'%e2%80%9a', '%e2%80%9b', '%e2%80%9e', '%e2%80%9f',
// copy, reg, deg, hellip and trade
'%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2',
// acute accents
'%c2%b4', '%cb%8a', '%cc%81', '%cd%81',
// grave accent, macron, caron
'%cc%80', '%cc%84', '%cc%8c',
// №
'%e2%84%96',
], '', $title );
// Convert times to x
$title = str_replace( '%c3%97', 'x', $title );
$title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
$title = preg_replace('/\s+/', '-', $title);
$title = preg_replace('|-+|', '-', $title);
$title = trim($title, '-');
return $title;
}
/**
* Checks to see if a string is utf8 encoded.
*
* NOTE: This function checks for 5-Byte sequences, UTF8
* has Bytes Sequences with a maximum length of 4.
*
* @param string $str The string to be checked
* @return bool True if $str fits a UTF-8 model, false otherwise.
*/
protected function seems_utf8($str) {
$length = strlen($str);
for ($i=0; $i < $length; $i++) {
$c = ord($str[$i]);
if ($c < 0x80) $n = 0; # 0bbbbbbb
elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
else return false; # Does not match any model
for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80))
return false;
}
}
return true;
}
/**
* Encode the Unicode values to be used in the URI.
*
* @param string $utf8_string
* @param int $length Max length of the string
* @return string String with Unicode encoded for URI.
*/
protected function utf8_uri_encode( $utf8_string, $length = 0 ) {
$unicode = '';
$values = [];
$num_octets = 1;
$unicode_length = 0;
$string_length = strlen( $utf8_string );
for ($i = 0; $i < $string_length; $i++ ) {
$value = ord( $utf8_string[ $i ] );
if ( $value < 128 ) {
if ( $length && ( $unicode_length >= $length ) )
break;
$unicode .= chr($value);
$unicode_length++;
} else {
if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3;
$values[] = $value;
if ( $length && ( $unicode_length + ($num_octets * 3) ) > $length )
break;
if ( count( $values ) == $num_octets ) {
if ($num_octets == 3) {
$unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
$unicode_length += 9;
} else {
$unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
$unicode_length += 6;
}
$values = [];
$num_octets = 1;
}
}
}
return $unicode;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment