Created
December 1, 2014 13:56
-
-
Save ineersa/db90c06c9d2a5ed3a1ec to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Class Encoding | |
* Usage: | |
* $text = Encoding::UTF8FixWin1252Chars($text); | |
* $text = Encoding::replaceBroken($text); | |
*/ | |
class Encoding { | |
protected static $brokenList = array( | |
"€"=>"€", "À"=>"À", | |
"‚"=>"‚", "Á"=>"Ã", | |
"ƒ"=>"Æ’", "Â"=>"Â", | |
"„"=>"„", "Ã"=>"Ã", | |
"…"=>"…", "Ä"=>"Ä", | |
"†"=>"â€", "Å"=>"Ã…", | |
"‡"=>"‡", "Æ"=>"Æ", | |
"ˆ"=>"ˆ", "Ç"=>"Ç", | |
"‰"=>"‰", "È"=>"È", | |
"Š"=>"Å", "É"=>"É", | |
"‹"=>"‹", "Ê"=>"Ê", | |
"Œ"=>"Å’", "Ë"=>"Ë", | |
"Ž"=>"Ž", "Ì"=>"ÃŒ", | |
"‘"=>"‘", "Í"=>"Ã", | |
"’"=>"’", "Î"=>"ÃŽ", | |
"“"=>"“", "Ï"=>"Ã", | |
"”"=>"â€", "Ð"=>"Ã", | |
"•"=>"•", "Ñ"=>"Ñ", | |
"–"=>"–", "Ò"=>"Ã’", | |
"—"=>"—", "Ó"=>"Ó", | |
"˜"=>"Ëœ", "Ô"=>"Ô", | |
"™"=>"â„¢", "Õ"=>"Õ", | |
"š"=>"Å¡", "Ö"=>"Ö", | |
"›"=>"›", "×"=>"×", | |
"œ"=>"Å“", "Ø"=>"Ø", | |
"ž"=>"ž", "Ù"=>"Ù", | |
"Ÿ"=>"Ÿ", "Ú"=>"Ú", | |
""=>"Â", "Û"=>"Û", | |
"¡"=>"¡", "Ü"=>"Ãœ", | |
"¢"=>"¢", "Ý"=>"Ã", | |
"£"=>"£", "Þ"=>"Þ", | |
"¤"=>"¤", "ß"=>"ß", | |
"¥"=>"Â¥", "à"=>"Ã", | |
"¦"=>"¦", "á"=>"á", | |
"§"=>"§", "â"=>"â", | |
"¨"=>"¨", "ã"=>"ã", | |
"©"=>"©", "ä"=>"ä", | |
"ª"=>"ª", "å"=>"Ã¥", | |
"«"=>"«", "æ"=>"æ", | |
"¬"=>"¬", "ç"=>"ç", | |
""=>"Â", "è"=>"è", | |
"®"=>"®", "é"=>"é", | |
"¯"=>"¯", "ê"=>"ê", | |
"°"=>"°", "ë"=>"ë", | |
"±"=>"±", "ì"=>"ì", | |
"²"=>"²", "í"=>"Ã", | |
"³"=>"³", "î"=>"î", | |
"´"=>"´", "ï"=>"ï", | |
"µ"=>"µ", "ð"=>"ð", | |
"¶"=>"¶", "ñ"=>"ñ", | |
"·"=>"·", "ò"=>"ò", | |
"¸"=>"¸", "ó"=>"ó", | |
"¹"=>"¹", "ô"=>"ô", | |
"º"=>"º", "õ"=>"õ", | |
"»"=>"»", "ö"=>"ö", | |
"¼"=>"¼", "÷"=>"÷", | |
"½"=>"½", "ø"=>"ø", | |
"¾"=>"¾", "ù"=>"ù", | |
"¿"=>"¿", "ú"=>"ú", | |
"û"=>"û", "ü"=>"ü", | |
"ý"=>"ý", "þ"=>"þ", | |
"ÿ"=>"ÿ" | |
); | |
protected static $brokenUtf8ToUtf8 = array( | |
"\xc2\x80" => "\xe2\x82\xac", | |
"\xc2\x82" => "\xe2\x80\x9a", | |
"\xc2\x83" => "\xc6\x92", | |
"\xc2\x84" => "\xe2\x80\x9e", | |
"\xc2\x85" => "\xe2\x80\xa6", | |
"\xc2\x86" => "\xe2\x80\xa0", | |
"\xc2\x87" => "\xe2\x80\xa1", | |
"\xc2\x88" => "\xcb\x86", | |
"\xc2\x89" => "\xe2\x80\xb0", | |
"\xc2\x8a" => "\xc5\xa0", | |
"\xc2\x8b" => "\xe2\x80\xb9", | |
"\xc2\x8c" => "\xc5\x92", | |
"\xc2\x8e" => "\xc5\xbd", | |
"\xc2\x91" => "\xe2\x80\x98", | |
"\xc2\x92" => "\xe2\x80\x99", | |
"\xc2\x93" => "\xe2\x80\x9c", | |
"\xc2\x94" => "\xe2\x80\x9d", | |
"\xc2\x95" => "\xe2\x80\xa2", | |
"\xc2\x96" => "\xe2\x80\x93", | |
"\xc2\x97" => "\xe2\x80\x94", | |
"\xc2\x98" => "\xcb\x9c", | |
"\xc2\x99" => "\xe2\x84\xa2", | |
"\xc2\x9a" => "\xc5\xa1", | |
"\xc2\x9b" => "\xe2\x80\xba", | |
"\xc2\x9c" => "\xc5\x93", | |
"\xc2\x9e" => "\xc5\xbe", | |
"\xc2\x9f" => "\xc5\xb8" | |
); | |
static function replaceBroken($text) | |
{ | |
$out = $text; | |
foreach(self::$brokenList as $replace=>$search){ | |
$len = mb_strlen($search,"UTF-8"); | |
if ($len == 3){ | |
$out = str_replace($search,$replace,$out); | |
} | |
} | |
foreach(self::$brokenList as $replace=>$search){ | |
$len = mb_strlen($search,"UTF-8"); | |
if ($len == 2){ | |
$out = str_replace($search,$replace,$out); | |
} | |
} | |
foreach(self::$brokenList as $replace=>$search){ | |
$len = mb_strlen($search,"UTF-8"); | |
if ($len == 1){ | |
$out = str_replace($search,$replace,$out); | |
} | |
} | |
return $out; | |
} | |
static function UTF8FixWin1252Chars($text){ | |
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I also had to add after line 86:
"\xc2\x8d" => "\xc3\xac",