Skip to content

Instantly share code, notes, and snippets.

@ineersa
Created December 1, 2014 13:56
Show Gist options
  • Save ineersa/db90c06c9d2a5ed3a1ec to your computer and use it in GitHub Desktop.
Save ineersa/db90c06c9d2a5ed3a1ec to your computer and use it in GitHub Desktop.
<?php
/**
* Class Encoding
* Usage:
* $text = Encoding::UTF8FixWin1252Chars($text);
* $text = Encoding::replaceBroken($text);
*/
class Encoding {
protected static $brokenList = array(
"€"=>"€", "À"=>"À",
"‚"=>"‚", "Á"=>"Ã",
"ƒ"=>"Æ’", "Â"=>"Â",
"„"=>"„", "Ã"=>"Ã",
"…"=>"…", "Ä"=>"Ä",
"†"=>"â€", "Å"=>"Ã…",
"‡"=>"‡", "Æ"=>"Æ",
"ˆ"=>"ˆ", "Ç"=>"Ç",
"‰"=>"‰", "È"=>"È",
"Š"=>"Å", "É"=>"É",
"‹"=>"‹", "Ê"=>"Ê",
"Œ"=>"Å’", "Ë"=>"Ë",
"Ž"=>"Ž", "Ì"=>"ÃŒ",
"‘"=>"‘", "Í"=>"Ã",
"’"=>"’", "Î"=>"ÃŽ",
"“"=>"“", "Ï"=>"Ã",
"”"=>"â€", "Ð"=>"Ã",
"•"=>"•", "Ñ"=>"Ñ",
"–"=>"–", "Ò"=>"Ã’",
"—"=>"—", "Ó"=>"Ó",
"˜"=>"Ëœ", "Ô"=>"Ô",
"™"=>"â„¢", "Õ"=>"Õ",
"š"=>"Å¡", "Ö"=>"Ö",
"›"=>"›", "×"=>"×",
"œ"=>"Å“", "Ø"=>"Ø",
"ž"=>"ž", "Ù"=>"Ù",
"Ÿ"=>"Ÿ", "Ú"=>"Ú",
""=>"Â", "Û"=>"Û",
"¡"=>"¡", "Ü"=>"Ãœ",
"¢"=>"¢", "Ý"=>"Ã",
"£"=>"£", "Þ"=>"Þ",
"¤"=>"¤", "ß"=>"ß",
"¥"=>"Â¥", "à"=>"Ã",
"¦"=>"¦", "á"=>"á",
"§"=>"§", "â"=>"â",
"¨"=>"¨", "ã"=>"ã",
"©"=>"©", "ä"=>"ä",
"ª"=>"ª", "å"=>"Ã¥",
"«"=>"«", "æ"=>"æ",
"¬"=>"¬", "ç"=>"ç",
""=>"­", "è"=>"è",
"®"=>"®", "é"=>"é",
"¯"=>"¯", "ê"=>"ê",
"°"=>"°", "ë"=>"ë",
"±"=>"±", "ì"=>"ì",
"²"=>"²", "í"=>"í",
"³"=>"³", "î"=>"î",
"´"=>"´", "ï"=>"ï",
"µ"=>"µ", "ð"=>"ð",
"¶"=>"¶", "ñ"=>"ñ",
"·"=>"·", "ò"=>"ò",
"¸"=>"¸", "ó"=>"ó",
"¹"=>"¹", "ô"=>"ô",
"º"=>"º", "õ"=>"õ",
"»"=>"»", "ö"=>"ö",
"¼"=>"¼", "÷"=>"÷",
"½"=>"½", "ø"=>"ø",
"¾"=>"¾", "ù"=>"ù",
"¿"=>"¿", "ú"=>"ú",
"û"=>"û", "ü"=>"ü",
"ý"=>"ý", "þ"=>"þ",
"ÿ"=>"ÿ"
);
protected static $brokenUtf8ToUtf8 = array(
"\xc2\x80" => "\xe2\x82\xac",
"\xc2\x82" => "\xe2\x80\x9a",
"\xc2\x83" => "\xc6\x92",
"\xc2\x84" => "\xe2\x80\x9e",
"\xc2\x85" => "\xe2\x80\xa6",
"\xc2\x86" => "\xe2\x80\xa0",
"\xc2\x87" => "\xe2\x80\xa1",
"\xc2\x88" => "\xcb\x86",
"\xc2\x89" => "\xe2\x80\xb0",
"\xc2\x8a" => "\xc5\xa0",
"\xc2\x8b" => "\xe2\x80\xb9",
"\xc2\x8c" => "\xc5\x92",
"\xc2\x8e" => "\xc5\xbd",
"\xc2\x91" => "\xe2\x80\x98",
"\xc2\x92" => "\xe2\x80\x99",
"\xc2\x93" => "\xe2\x80\x9c",
"\xc2\x94" => "\xe2\x80\x9d",
"\xc2\x95" => "\xe2\x80\xa2",
"\xc2\x96" => "\xe2\x80\x93",
"\xc2\x97" => "\xe2\x80\x94",
"\xc2\x98" => "\xcb\x9c",
"\xc2\x99" => "\xe2\x84\xa2",
"\xc2\x9a" => "\xc5\xa1",
"\xc2\x9b" => "\xe2\x80\xba",
"\xc2\x9c" => "\xc5\x93",
"\xc2\x9e" => "\xc5\xbe",
"\xc2\x9f" => "\xc5\xb8"
);
static function replaceBroken($text)
{
$out = $text;
foreach(self::$brokenList as $replace=>$search){
$len = mb_strlen($search,"UTF-8");
if ($len == 3){
$out = str_replace($search,$replace,$out);
}
}
foreach(self::$brokenList as $replace=>$search){
$len = mb_strlen($search,"UTF-8");
if ($len == 2){
$out = str_replace($search,$replace,$out);
}
}
foreach(self::$brokenList as $replace=>$search){
$len = mb_strlen($search,"UTF-8");
if ($len == 1){
$out = str_replace($search,$replace,$out);
}
}
return $out;
}
static function UTF8FixWin1252Chars($text){
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
}
}
@tamanti
Copy link

tamanti commented Feb 26, 2018

I also had to add after line 86:
"\xc2\x8d" => "\xc3\xac",

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment