Created
November 16, 2013 22:57
-
-
Save xeoncross/7506523 to your computer and use it in GitHub Desktop.
Try to convert HTML, unicode, and other smart quotes to plain quotes. Fork this gist to add support for more types.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function convert_smart_quotes($string) | |
{ | |
$quotes = array( | |
"\xC2\xAB" => '"', // « (U+00AB) in UTF-8 | |
"\xC2\xBB" => '"', // » (U+00BB) in UTF-8 | |
"\xE2\x80\x98" => "'", // ‘ (U+2018) in UTF-8 | |
"\xE2\x80\x99" => "'", // ’ (U+2019) in UTF-8 | |
"\xE2\x80\x9A" => "'", // ‚ (U+201A) in UTF-8 | |
"\xE2\x80\x9B" => "'", // ‛ (U+201B) in UTF-8 | |
"\xE2\x80\x9C" => '"', // “ (U+201C) in UTF-8 | |
"\xE2\x80\x9D" => '"', // ” (U+201D) in UTF-8 | |
"\xE2\x80\x9E" => '"', // „ (U+201E) in UTF-8 | |
"\xE2\x80\x9F" => '"', // ‟ (U+201F) in UTF-8 | |
"\xE2\x80\xB9" => "'", // ‹ (U+2039) in UTF-8 | |
"\xE2\x80\xBA" => "'", // › (U+203A) in UTF-8 | |
); | |
$string = strtr($string, $quotes); | |
// Version 2 | |
$search = array( | |
chr(145), | |
chr(146), | |
chr(147), | |
chr(148), | |
chr(151) | |
); | |
$replace = array("'","'",'"','"',' - '); | |
$string = str_replace($search, $replace, $string); | |
// Version 3 | |
$string = str_replace( | |
array('‘','’','“','”'), | |
array("'", "'", '"', '"'), | |
$string | |
); | |
// Version 4 | |
$search = array( | |
'‘', | |
'’', | |
'“', | |
'”', | |
'—', | |
'–', | |
); | |
$replace = array("'","'",'"','"',' - ', '-'); | |
$string = str_replace($search, $replace, $string); | |
return $string; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A small question: what about duplicating the special codes to use both the uppercase and the lowercase versions? Like "\xE2\x80\x98" and "\xe2\x80\x98". Also, single quotes around these strings seems to work better.