Skip to content

Instantly share code, notes, and snippets.

@JayWood
Last active August 29, 2015 14:25
Show Gist options
  • Save JayWood/d74edc7618569f7c4be7 to your computer and use it in GitHub Desktop.
Save JayWood/d74edc7618569f7c4be7 to your computer and use it in GitHub Desktop.
Fixes MS Word data
/**
* fixMSWord
*
* Replace ascii chars with utf8. Note there are ascii characters that don't
* correctly map and will be replaced by spaces.
*
* Updated 7-15-2015 by Jay Wood to encode lower end items into HTML entity counterparts.
*
* @author Robin Cafolla,Jay Wood
* @date 2013-03-22
* @Copyright (c) 2013 Robin Cafolla
* @licence MIT (x11) http://opensource.org/licenses/MIT
*/
function fixMSWord( $string ) {
$map = Array(
'33' => '!', '34' => '"', '35' => '#', '36' => '$', '37' => '%', '38' => '&', '39' => "'", '40' => '(', '41' => ')', '42' => '*',
'43' => '+', '44' => ',', '45' => '-', '46' => '.', '47' => '/', '48' => '0', '49' => '1', '50' => '2', '51' => '3', '52' => '4',
'53' => '5', '54' => '6', '55' => '7', '56' => '8', '57' => '9', '58' => ':', '59' => ';', '60' => '<', '61' => '=', '62' => '>',
'63' => '?', '64' => '@', '65' => 'A', '66' => 'B', '67' => 'C', '68' => 'D', '69' => 'E', '70' => 'F', '71' => 'G', '72' => 'H',
'73' => 'I', '74' => 'J', '75' => 'K', '76' => 'L', '77' => 'M', '78' => 'N', '79' => 'O', '80' => 'P', '81' => 'Q', '82' => 'R',
'83' => 'S', '84' => 'T', '85' => 'U', '86' => 'V', '87' => 'W', '88' => 'X', '89' => 'Y', '90' => 'Z', '91' => '[', '92' => '\\',
'93' => ']', '94' => '^', '95' => '_', '96' => '`', '97' => 'a', '98' => 'b', '99' => 'c', '100'=> 'd', '101'=> 'e', '102'=> 'f',
'103'=> 'g', '104'=> 'h', '105'=> 'i', '106'=> 'j', '107'=> 'k', '108'=> 'l', '109'=> 'm', '110'=> 'n', '111'=> 'o', '112'=> 'p',
'113'=> 'q', '114'=> 'r', '115'=> 's', '116'=> 't', '117'=> 'u', '118'=> 'v', '119'=> 'w', '120'=> 'x', '121'=> 'y', '122'=> 'z',
'123'=> '{', '124'=> '|', '125'=> '}', '126'=> '~', '127'=> '&#127;', '128'=> '&#8364;', '129'=> '&#129;', '130'=> '&#130;', '131'=> '&#131;', '132'=> '&#132;',
'133'=> '&#133;', '134'=> '&#134;', '135'=> '&#135;', '136'=> '&#136;', '137'=> '&#137;', '138'=> '&#138;', '139'=> '&#139;', '140'=> '&#140;', '141'=> '&#141;', '142'=> '&#142;',
'143'=> '&#143;', '144'=> '&#144;', '145'=> "&#145;", '146'=> "&#146;", '147'=> '&#147;', '148'=> '&#148;', '149'=> '&#149;', '150'=> '&#150;', '151'=> '&#151;', '152'=> '&#152;',
'153'=> '&#153;', '154'=> '&#154;', '155'=> '&#155;', '156'=> '&#156;', '157'=> '&#157;', '158'=> '&#158;', '159'=> '&#159;', '160'=> '&#160;', '161'=> '&#161;', '162'=> '&#162;',
'163'=> '&#163;', '164'=> '&#164;', '165'=> '&#165;', '166'=> '&#166;', '167'=> '&#167;', '168'=> '&#168;', '169'=> '&#169;', '170'=> '&#170;', '171'=> '&#171;', '172'=> '&#172;',
'173'=> '&#173;', '174'=> '&#174;', '175'=> '&#175;', '176'=> '&#176;', '177'=> '&#177;', '178'=> '&#178;', '179'=> '&#179;', '180'=> '&#180;', '181'=> '&#181;', '182'=> '&#182;',
'183'=> '&#183;', '184'=> '&#184;', '185'=> '&#185;', '186'=> '&#186;', '187'=> '&#187;', '188'=> '&#188;', '189'=> '&#189;', '190'=> '&#190;', '191'=> '&#191;', '192'=> '&#192;',
'193'=> '&#193;', '194'=> '&#194;', '195'=> '&#195;', '196'=> '&#196;', '197'=> '&#197;', '198'=> '&#198;', '199'=> '&#199;', '200'=> '&#200;', '201'=> '&#201;', '202'=> '&#202;',
'203'=> '&#203;', '204'=> '&#204;', '205'=> '&#205;', '206'=> '&#206;', '207'=> '&#207;', '208'=> '&#208;', '209'=> '&#209;', '210'=> '&#210;', '211'=> '&#211;', '212'=> '&#212;',
'213'=> '&#213;', '214'=> '&#214;', '215'=> '&#215;', '216'=> '&#216;', '217'=> '&#217;', '218'=> '&#218;', '219'=> '&#219;', '220'=> '&#220;', '221'=> '&#221;', '222'=> '&#222;',
'223'=> '&#223;', '224'=> '&#224;', '225'=> '&#225;', '226'=> '&#226;', '227'=> '&#227;', '228'=> '&#228;', '229'=> '&#229;', '230'=> '&#230;', '231'=> '&#231;', '232'=> '&#232;',
'233'=> '&#233;', '234'=> '&#234;', '235'=> '&#235;', '236'=> '&#236;', '237'=> '&#237;', '238'=> '&#238;', '239'=> '&#239;', '240'=> '&#240;', '241'=> '&#241;', '242'=> '&#242;',
'243'=> '&#243;', '244'=> '&#244;', '245'=> '&#245;', '246'=> '&#246;', '247'=> '&#247;', '248'=> '&#248;', '249'=> '&#249;', '250'=> '&#250;', '251'=> '&#251;', '252'=> '&#252;',
'253'=> '&#253;', '254'=> '&#254;', '255'=> '&#255;'
);
$search = Array();
$replace = Array();
foreach ($map as $s => $r) {
$search[] = chr((int)$s);
$replace[] = $r;
}
return str_replace($search, $replace, $string);
}
@JayWood
Copy link
Author

JayWood commented Jul 15, 2015

Updated to use HTML entities on lower end characters.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment