Skip to content

Instantly share code, notes, and snippets.

@tot-ra
Created March 18, 2013 23:51
Show Gist options
  • Save tot-ra/5192145 to your computer and use it in GitHub Desktop.
Save tot-ra/5192145 to your computer and use it in GitHub Desktop.
UTF - Win1251 conversion
function win1251_utf8($s){
$t='';
for($i=0, $m=strlen($s); $i<$m; $i++)
{
$c=ord($s[$i]);
if ($c<=127) {$t.=chr($c); continue; }
if ($c>=192 && $c<=207) {$t.=chr(208).chr($c-48); continue; }
if ($c>=208 && $c<=239) {$t.=chr(208).chr($c-48); continue; }
if ($c>=240 && $c<=255) {$t.=chr(209).chr($c-112); continue; }
if ($c==184) { $t.=chr(209).chr(209); continue; };
if ($c==168) { $t.=chr(208).chr(129); continue; };
}
return $t;
}
function utf2win1251($content)
{
$newcontent = "";
for ($i = 0; $i < strlen($content); $i++)
{
$c1 = substr($content, $i, 1);
$byte1 = ord($c1);
if ($byte1>>5 == 6)
{
$i++;
$c2 = substr($content, $i, 1);
$byte2 = ord($c2);
$byte1 &= 31;
$byte2 &= 63;
$byte2 |= (($byte1 & 3) << 6);
$byte1 >>= 2;
$word = ($byte1<<8) + $byte2;
if ($word == 1025) $newcontent .= chr(168);
else if ($word == 1105) $newcontent .= chr(184);
else if ($word >= 0x0410 && $word <= 0x044F) $newcontent .= chr($word-848);
else
{
$a = dechex($byte1);
$a = str_pad($a, 2, "0", STR_PAD_LEFT);
$b = dechex($byte2);
$b = str_pad($b, 2, "0", STR_PAD_LEFT);
$newcontent .= "".$a.$b.";";
}
}
else
$newcontent .= $c1;
}
return $newcontent;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment