Skip to content

Instantly share code, notes, and snippets.

@luchaninov
Created February 24, 2017 19:06
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save luchaninov/a8469ce649c8428cd02be793165f260a to your computer and use it in GitHub Desktop.
Save luchaninov/a8469ce649c8428cd02be793165f260a to your computer and use it in GitHub Desktop.
Detect cyrillic encoding and convert to utf-8
<?php
class Encoding
{
private $chars = [
'й', 'ц', 'у', 'к', 'е', 'н', 'г', 'ш', 'щ', 'з', 'х', 'ъ', 'ф', 'ы', 'в', 'а', 'п', 'р', 'о', 'л', 'д',
'ж', 'э', 'я', 'ч', 'с', 'м', 'и', 'т', 'ь', 'б', 'ю',
'Й', 'Ц', 'У', 'К', 'Е', 'Н', 'Г', 'Ш', 'Щ', 'З', 'Х', 'Ъ', 'Ф', 'Ы', 'В', 'А', 'П', 'Р', 'О', 'Л', 'Д',
'Ж', 'Э', 'Я', 'Ч', 'С', 'М', 'И', 'Т', 'Ь', 'Б', 'Ю'
];
private $cp1251chars = [];
public function __construct()
{
foreach ($this->chars as $char) {
$this->cp1251chars[] = iconv('utf-8', 'cp1251', $char);
}
}
public function detectEncoding($s)
{
$utf8count = 0;
$cp1251count = 0;
foreach ($this->chars as $char) {
if (strpos($s, $char) !== false) {
$utf8count++;
}
}
foreach ($this->cp1251chars as $char) {
if (strpos($s, $char) !== false) {
$cp1251count++;
}
}
if ($cp1251count > $utf8count) {
return 'cp1251';
}
return 'utf-8';
}
public function toUtf8($s)
{
$encoding = $this->detectEncoding($s);
if ($encoding == 'utf-8') {
return $s;
}
return iconv($encoding, 'utf-8//IGNORE', $s);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment