Skip to content

Instantly share code, notes, and snippets.

@korobochkin
Last active August 29, 2015 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save korobochkin/5db48e4777a85823e08e to your computer and use it in GitHub Desktop.
Save korobochkin/5db48e4777a85823e08e to your computer and use it in GitHub Desktop.
WordPress additional filters for sanitize bad filenames with Cyrillic symbols, iOS smiles and other bad symbols.
<?php
function selena_network_sanitize_file_name ($name, $original_name) {
$chars_table = array (
// Cyrillic alphabet
'А' => 'a', 'Б' => 'b', 'В' => 'v', 'Г' => 'g', 'Д' => 'd',
'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd',
'Е' => 'e', 'Ж' => 'zh', 'З' => 'z', 'И' => 'i', 'Й' => 'j',
'е' => 'e', 'ж' => 'zh', 'з' => 'z', 'и' => 'i', 'й' => 'j',
'К' => 'k', 'Л' => 'l', 'М' => 'm', 'Н' => 'n', 'О' => 'o',
'к' => 'k', 'л' => 'l', 'м' => 'm', 'н' => 'n', 'о' => 'o',
'П' => 'p', 'Р' => 'r', 'С' => 's', 'Т' => 't', 'У' => 'u',
'п' => 'p', 'р' => 'r', 'с' => 's', 'т' => 't', 'у' => 'u',
'Ф' => 'f', 'Х' => 'h', 'Ц' => 'c', 'Ч' => 'ch', 'Ш' => 'sh',
'ф' => 'f', 'х' => 'h', 'ц' => 'c', 'ч' => 'ch', 'ш' => 'sh',
'Щ' => 'shch', 'Ь' => '', 'Ю' => 'ju', 'Я' => 'ja',
'щ' => 'shch', 'ь' => '', 'ю' => 'ju', 'я' => 'ja',
// Ukrainian
'Ґ' => 'g', 'Є' => 'ye', 'І' => 'i', 'Ї' => 'yi',
'ґ' => 'g', 'є' => 'ye', 'і' => 'i', 'ї' => 'yi',
// Russian
'Ё' => 'yo', 'Ы' => 'y', 'Ъ' => '', 'Э' => 'e',
'ё' => 'yo', 'ы' => 'y', 'ъ' => '', 'э' => 'e',
// Belorussian
'Ў' => 'u',
'ў' => 'u',
// German
'Ä' => 'ae', 'Ö' => 'oe', 'Ü' => 'ue', 'ß' => 'ss',
'ä' => 'ae', 'ö' => 'oe', 'ü' => 'ue',
// Polish
'Ą' => 'a', 'Ć' => 'c', 'Ę' => 'e', 'Ł' => 'l', 'Ń' => 'n',
'ą' => 'a', 'ć' => 'c', 'ę' => 'e', 'ł' => 'l', 'ń' => 'n',
'Ó' => 'o', 'Ś' => 's', 'Ź' => 'z', 'Ż' => 'z',
'ó' => 'o', 'ś' => 's', 'ź' => 'z', 'ż' => 'z',
// Hungarian
'Ő' => 'o', 'Ű' => 'u',
'ő' => 'o', 'ű' => 'u',
// Czech
'Ě' => 'e', 'Š' => 's', 'Č' => 'c', 'Ř' => 'r', 'Ž' => 'z',
'ě' => 'e', 'š' => 's', 'č' => 'c', 'ř' => 'r', 'ž' => 'z',
'Ý' => 'y', 'Á' => 'a', 'É' => 'e', 'Ď' => 'd', 'Ť' => 't',
'ý' => 'y', 'á' => 'a', 'é' => 'e', 'ď' => 'd', 'ť' => 't',
'Ň' => 'n', 'Ú' => 'u', 'Ů' => 'u',
'ň' => 'n', 'ú' => 'u', 'ů' => 'u',
// Greek alphabet & modern polytonic characters
'Α' => 'a', 'Β' => 'v', 'Γ' => 'g', 'Δ' => 'd', 'Ε' => 'e',
'α' => 'a', 'β' => 'v', 'γ' => 'g', 'δ' => 'd', 'ε' => 'e',
'Ζ' => 'z', 'Η' => 'i', 'Θ' => 'th', 'Ι' => 'i', 'Κ' => 'k',
'ζ' => 'z', 'η' => 'i', 'θ' => 'th', 'ι' => 'i', 'κ' => 'k',
'Λ' => 'l', 'Μ' => 'm', 'Ν' => 'n', 'Ξ' => 'x', 'Ο' => 'o',
'λ' => 'l', 'μ' => 'm', 'ν' => 'n', 'ξ' => 'x', 'ο' => 'o',
'Π' => 'p', 'Ρ' => 'r', 'Σ' => 's', 'Τ' => 't', 'Υ' => 'y',
'π' => 'p', 'ρ' => 'r', 'σ' => 's', 'τ' => 't', 'υ' => 'y',
'Φ' => 'f', 'Χ' => 'ch', 'Ψ' => 'ps', 'Ω' => 'o', 'Ά' => 'a',
'φ' => 'f', 'χ' => 'ch', 'ψ' => 'ps', 'ω' => 'o', 'ά' => 'a',
'Έ' => 'e', 'Ή' => 'i', 'Ί' => 'i', 'Ό' => 'o', 'Ύ' => 'y',
'έ' => 'e', 'ή' => 'i', 'ί' => 'i', 'ό' => 'o', 'ύ' => 'y',
'Ώ' => 'o', 'Ϊ' => 'i', 'Ϋ' => 'y',
'ώ' => 'o', 'ς' => 's', 'ΐ' => 'i', 'ϊ' => 'i', 'ϋ' => 'y', 'ΰ' => 'y',
// Extra all (http:www.atm.ox.ac.ukuseriwicharmap.html)
'À' => 'a', 'Á' => 'a', 'Â' => 'a', 'Ã' => 'a', 'Å' => 'a',
'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'å' => 'a',
'Æ' => 'ae', 'Ç' => 'c', 'È' => 'e', 'É' => 'e', 'Ê' => 'e',
'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e',
'Ë' => 'e', 'Ì' => 'i', 'Í' => 'i', 'Î' => 'i', 'Ï' => 'i',
'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i',
'Ð' => 'd', 'Ñ' => 'n', 'Ò' => 'o', 'Ô' => 'o', 'Õ' => 'o',
'ð' => 'd', 'ñ' => 'n', 'ò' => 'o', 'ô' => 'o', 'õ' => 'o',
'×' => 'x', 'Ø' => 'o', 'Ù' => 'u', 'Ú' => 'u', 'Û' => 'u',
'×' => 'x', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u',
'Þ' => 'p', 'Ÿ' => 'y',
'þ' => 'p', 'ÿ' => 'y',
// Other
'№' => '', '“' => '', '”' => '', '«' => '', '»' => '',
'„' => '', '@' => '', '%' => '', '‘' => '', '’' => '',
'`' => '', '´' => '', 'º' => 'o', 'ª' => 'a',
);
$name = remove_accents ($name);
$name = strtr ($name, $chars_table);
$name = urlencode ($name);
// Перезапись % на ничего после urlencode
$name = str_replace (array ('%'), '', $name );
/*
* Проверка на длину файла
* UNIX поддерживает 255 символов в имени максимум.
* Мы обрезаем длину имени файла до 130 символов, как и на wordpress.com,
* WordPress добавляет дополнительные символы в имя файла, например
* image-1000x1000.jpg (поэтому мы резервируем половину возможной длины).
*/
$info = pathinfo ($name);
$name = substr ($info ['filename'], 0, 130);
$name .= '.' . $info ['extension'];
return $name;
}
add_filter ('sanitize_file_name', 'selena_network_sanitize_file_name', 99, 2);
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment