Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Repair utf-8 strings that contain iso-8599 encoded utf-8 characters
class EncodingRepairer
REPLACEMENTS = {
"€" => "€", "‚" => "‚", "„" => "„", "…" => "…", "ˆ" => "ˆ",
"‹" => "‹", "‘" => "‘", "’" => "’", "“" => "“", "â€" => "”",
"•" => "•", "–" => "–", "—" => "—", "Ëœ" => "˜", "â„¢" => "™",
"›" => "›", "Å“" => "œ", "Å’" => "Œ", "ž" => "ž", "Ÿ" => "Ÿ",
"Å¡" => "š", "Ž" => "Ž", "¡" => "¡", "¢" => "¢", "£" => "£",
"¤" => "¤", "Â¥" => "¥", "¦" => "¦", "§" => "§", "¨" => "¨",
"©" => "©", "ª" => "ª", "«" => "«", "¬" => "¬", "®" => "®",
"¯" => "¯", "°" => "°", "±" => "±", "²" => "²", "³" => "³",
"´" => "´", "µ" => "µ", "¶" => "¶", "·" => "·", "¸" => "¸",
"¹" => "¹", "º" => "º", "»" => "»", "¼" => "¼", "½" => "½",
"¾" => "¾", "¿" => "¿", "À" => "À", "Â" => "Â", "Ã" => "Ã",
"Ä" => "Ä", "Ã…" => "Å", "Æ" => "Æ", "Ç" => "Ç", "È" => "È",
"É" => "É", "Ê" => "Ê", "Ë" => "Ë", "ÃŒ" => "Ì", "ÃŽ" => "Î",
"Ñ" => "Ñ", "Ã’" => "Ò", "Ó" => "Ó", "Ô" => "Ô", "Õ" => "Õ",
"Ö" => "Ö", "×" => "×", "Ø" => "Ø", "Ù" => "Ù", "Ú" => "Ú",
"Û" => "Û", "Ãœ" => "Ü", "Þ" => "Þ", "ß" => "ß", "á" => "á",
"â" => "â", "ã" => "ã", "ä" => "ä", "Ã¥" => "å", "æ" => "æ",
"ç" => "ç", "è" => "è", "é" => "é", "ê" => "ê", "ë" => "ë",
"ì" => "ì", "í" => "í", "î" => "î", "ï" => "ï", "ð" => "ð",
"ñ" => "ñ", "ò" => "ò", "ó" => "ó", "ô" => "ô", "õ" => "õ",
"ö" => "ö", "÷" => "÷", "ø" => "ø", "ù" => "ù", "ú" => "ú",
"û" => "û", "ü" => "ü", "ý" => "ý", "þ" => "þ", "ÿ" => "ÿ"
}
def repair(value)
value or return
value.gsub!(Regexp.new(REPLACEMENTS.keys * ?|), REPLACEMENTS)
end
end
@djantdogg

This comment has been minimized.

Copy link

@djantdogg djantdogg commented May 18, 2018

ÔŽ€x4UÝÞgˆÉÅÔÉ�ˆ <žÝ‘¹BÒ¬Ò«È^¹õÛE10[�5è#��Th�Ž½Ç*[á‰��Ÿ‡®ÀG|2^Ôøu' µª�™þ<j8­˜ ½�Z‰74 –C¡åæ�aÜáàÿ�åýU�"„ÝËuæåîÿÖ“‹"&Ã?av�ýB¹µg;�6�Yî>ñ¨Hö�lÆ� žÅ�½sÓ�Æ�®¤&æö‰ƒš"�®�Ï +ìß8w²ì&—��xöõ+d–’[x-'òD™i‚bµzjyÌ•~‹¤_�…ÒxzÀ�Ñoª¢,Qa‚‡%¦�~†LÖh¼Š^ìï’�ÎôLÕ!j+ZA‚&•��BÔ-…$â4Ù7,'÷G+’36£ïð}”ξ5·pÖuêà7„ÍîLÌú�’,F>‹x�†¯�DZÑ�º˜’:�uŸ#Ö-®’ZÖÅ$ä’ÁÅ: ò?jë�u|»Ú;7cü—5P‹…Åíº�×Se}�dØò€e�õ¥��ÉkPo¤ž(�]ÐÜâà}¦ù¦ža/Ág�v‰ «}Ì�Yá{ SØU¿š. «û�Ïýeq�ø00«P4‚™um2Š;¯�Ž&²O Œå »�Ó���0
6„‹)鮋lî�φfLùîÛ–ô’Õ©e�—�[þ†1BÁ�L"·¥'ZÖ
hì“ �›N¸�4:ÉE=A­mH�nÚ�EåO¸ê×qD��öÞÞ�¼]^é�&ê((�pTø·¿°qØ�åä$ÓMïœöNQä�0}ÐL¼,dê ÒX¤xK‡%‚¹.õ CàÏ�!äÔB��š¬›“™ª_îü(ùŽhº…�›�z£T­Õ£zÓQF8äËdæ!�0Q×.�+�R[O…‡VÊ�

@markegorman

This comment has been minimized.

Copy link

@markegorman markegorman commented May 27, 2019

How do I go about reading this I’m a new b

@moripa1

This comment has been minimized.

Copy link

@moripa1 moripa1 commented Dec 19, 2020

‰U-ðÄ…g-5Óݧm<Ñ•t<#×Ö½d^ Ó±9aôË ž+�®½„?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment