Skip to content

Instantly share code, notes, and snippets.

@gregrickaby
Forked from JayWood/fix_msword.php
Last active August 29, 2015 14:25
Show Gist options
  • Save gregrickaby/c9ff4e6c2e4d41ec8bf7 to your computer and use it in GitHub Desktop.
Save gregrickaby/c9ff4e6c2e4d41ec8bf7 to your computer and use it in GitHub Desktop.
/**
* fixMSWord
*
* Replace ascii chars with utf8. Note there are ascii characters that don't
* correctly map and will be replaced by spaces.
*
* Updated 7-15-2015 by Jay Wood to encode lower end items into HTML entity counterparts.
*
* @author Robin Cafolla,Jay Wood
* @date 2013-03-22
* @Copyright (c) 2013 Robin Cafolla
* @licence MIT (x11) http://opensource.org/licenses/MIT
*/
function fixMSWord( $string ) {
$map = Array(
'33' => '!', '34' => '"', '35' => '#', '36' => '$', '37' => '%', '38' => '&', '39' => "'", '40' => '(', '41' => ')', '42' => '*',
'43' => '+', '44' => ',', '45' => '-', '46' => '.', '47' => '/', '48' => '0', '49' => '1', '50' => '2', '51' => '3', '52' => '4',
'53' => '5', '54' => '6', '55' => '7', '56' => '8', '57' => '9', '58' => ':', '59' => ';', '60' => '<', '61' => '=', '62' => '>',
'63' => '?', '64' => '@', '65' => 'A', '66' => 'B', '67' => 'C', '68' => 'D', '69' => 'E', '70' => 'F', '71' => 'G', '72' => 'H',
'73' => 'I', '74' => 'J', '75' => 'K', '76' => 'L', '77' => 'M', '78' => 'N', '79' => 'O', '80' => 'P', '81' => 'Q', '82' => 'R',
'83' => 'S', '84' => 'T', '85' => 'U', '86' => 'V', '87' => 'W', '88' => 'X', '89' => 'Y', '90' => 'Z', '91' => '[', '92' => '\\',
'93' => ']', '94' => '^', '95' => '_', '96' => '`', '97' => 'a', '98' => 'b', '99' => 'c', '100'=> 'd', '101'=> 'e', '102'=> 'f',
'103'=> 'g', '104'=> 'h', '105'=> 'i', '106'=> 'j', '107'=> 'k', '108'=> 'l', '109'=> 'm', '110'=> 'n', '111'=> 'o', '112'=> 'p',
'113'=> 'q', '114'=> 'r', '115'=> 's', '116'=> 't', '117'=> 'u', '118'=> 'v', '119'=> 'w', '120'=> 'x', '121'=> 'y', '122'=> 'z',
'123'=> '{', '124'=> '|', '125'=> '}', '126'=> '~', '127'=> '&#127;', '128'=> '&#8364;', '129'=> '&#129;', '130'=> '&#130;', '131'=> '&#131;', '132'=> '&#132;',
'133'=> '&#133;', '134'=> '&#134;', '135'=> '&#135;', '136'=> '&#136;', '137'=> '&#137;', '138'=> '&#138;', '139'=> '&#139;', '140'=> '&#140;', '141'=> '&#141;', '142'=> '&#142;',
'143'=> '&#143;', '144'=> '&#144;', '145'=> "&#145;", '146'=> "&#146;", '147'=> '&#147;', '148'=> '&#148;', '149'=> '&#149;', '150'=> '&#150;', '151'=> '&#151;', '152'=> '&#152;',
'153'=> '&#153;', '154'=> '&#154;', '155'=> '&#155;', '156'=> '&#156;', '157'=> '&#157;', '158'=> '&#158;', '159'=> '&#159;', '160'=> '&#160;', '161'=> '&#161;', '162'=> '&#162;',
'163'=> '&#163;', '164'=> '&#164;', '165'=> '&#165;', '166'=> '&#166;', '167'=> '&#167;', '168'=> '&#168;', '169'=> '&#169;', '170'=> '&#170;', '171'=> '&#171;', '172'=> '&#172;',
'173'=> '&#173;', '174'=> '&#174;', '175'=> '&#175;', '176'=> '&#176;', '177'=> '&#177;', '178'=> '&#178;', '179'=> '&#179;', '180'=> '&#180;', '181'=> '&#181;', '182'=> '&#182;',
'183'=> '&#183;', '184'=> '&#184;', '185'=> '&#185;', '186'=> '&#186;', '187'=> '&#187;', '188'=> '&#188;', '189'=> '&#189;', '190'=> '&#190;', '191'=> '&#191;', '192'=> '&#192;',
'193'=> '&#193;', '194'=> '&#194;', '195'=> '&#195;', '196'=> '&#196;', '197'=> '&#197;', '198'=> '&#198;', '199'=> '&#199;', '200'=> '&#200;', '201'=> '&#201;', '202'=> '&#202;',
'203'=> '&#203;', '204'=> '&#204;', '205'=> '&#205;', '206'=> '&#206;', '207'=> '&#207;', '208'=> '&#208;', '209'=> '&#209;', '210'=> '&#210;', '211'=> '&#211;', '212'=> '&#212;',
'213'=> '&#213;', '214'=> '&#214;', '215'=> '&#215;', '216'=> '&#216;', '217'=> '&#217;', '218'=> '&#218;', '219'=> '&#219;', '220'=> '&#220;', '221'=> '&#221;', '222'=> '&#222;',
'223'=> '&#223;', '224'=> '&#224;', '225'=> '&#225;', '226'=> '&#226;', '227'=> '&#227;', '228'=> '&#228;', '229'=> '&#229;', '230'=> '&#230;', '231'=> '&#231;', '232'=> '&#232;',
'233'=> '&#233;', '234'=> '&#234;', '235'=> '&#235;', '236'=> '&#236;', '237'=> '&#237;', '238'=> '&#238;', '239'=> '&#239;', '240'=> '&#240;', '241'=> '&#241;', '242'=> '&#242;',
'243'=> '&#243;', '244'=> '&#244;', '245'=> '&#245;', '246'=> '&#246;', '247'=> '&#247;', '248'=> '&#248;', '249'=> '&#249;', '250'=> '&#250;', '251'=> '&#251;', '252'=> '&#252;',
'253'=> '&#253;', '254'=> '&#254;', '255'=> '&#255;'
);
$search = Array();
$replace = Array();
foreach ($map as $s => $r) {
$search[] = chr((int)$s);
$replace[] = $r;
}
return str_replace($search, $replace, $string);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment