Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Fixes MS Word data
/**
* fixMSWord
*
* Replace ascii chars with utf8. Note there are ascii characters that don't
* correctly map and will be replaced by spaces.
*
* Updated 7-15-2015 by Jay Wood to encode lower end items into HTML entity counterparts.
*
* @author Robin Cafolla,Jay Wood
* @date 2013-03-22
* @Copyright (c) 2013 Robin Cafolla
* @licence MIT (x11) http://opensource.org/licenses/MIT
*/
function fixMSWord( $string ) {
$map = Array(
'33' => '!', '34' => '"', '35' => '#', '36' => '$', '37' => '%', '38' => '&', '39' => "'", '40' => '(', '41' => ')', '42' => '*',
'43' => '+', '44' => ',', '45' => '-', '46' => '.', '47' => '/', '48' => '0', '49' => '1', '50' => '2', '51' => '3', '52' => '4',
'53' => '5', '54' => '6', '55' => '7', '56' => '8', '57' => '9', '58' => ':', '59' => ';', '60' => '<', '61' => '=', '62' => '>',
'63' => '?', '64' => '@', '65' => 'A', '66' => 'B', '67' => 'C', '68' => 'D', '69' => 'E', '70' => 'F', '71' => 'G', '72' => 'H',
'73' => 'I', '74' => 'J', '75' => 'K', '76' => 'L', '77' => 'M', '78' => 'N', '79' => 'O', '80' => 'P', '81' => 'Q', '82' => 'R',
'83' => 'S', '84' => 'T', '85' => 'U', '86' => 'V', '87' => 'W', '88' => 'X', '89' => 'Y', '90' => 'Z', '91' => '[', '92' => '\\',
'93' => ']', '94' => '^', '95' => '_', '96' => '`', '97' => 'a', '98' => 'b', '99' => 'c', '100'=> 'd', '101'=> 'e', '102'=> 'f',
'103'=> 'g', '104'=> 'h', '105'=> 'i', '106'=> 'j', '107'=> 'k', '108'=> 'l', '109'=> 'm', '110'=> 'n', '111'=> 'o', '112'=> 'p',
'113'=> 'q', '114'=> 'r', '115'=> 's', '116'=> 't', '117'=> 'u', '118'=> 'v', '119'=> 'w', '120'=> 'x', '121'=> 'y', '122'=> 'z',
'123'=> '{', '124'=> '|', '125'=> '}', '126'=> '~', '127'=> '&#127;', '128'=> '&#8364;', '129'=> '&#129;', '130'=> '&#130;', '131'=> '&#131;', '132'=> '&#132;',
'133'=> '&#133;', '134'=> '&#134;', '135'=> '&#135;', '136'=> '&#136;', '137'=> '&#137;', '138'=> '&#138;', '139'=> '&#139;', '140'=> '&#140;', '141'=> '&#141;', '142'=> '&#142;',
'143'=> '&#143;', '144'=> '&#144;', '145'=> "&#145;", '146'=> "&#146;", '147'=> '&#147;', '148'=> '&#148;', '149'=> '&#149;', '150'=> '&#150;', '151'=> '&#151;', '152'=> '&#152;',
'153'=> '&#153;', '154'=> '&#154;', '155'=> '&#155;', '156'=> '&#156;', '157'=> '&#157;', '158'=> '&#158;', '159'=> '&#159;', '160'=> '&#160;', '161'=> '&#161;', '162'=> '&#162;',
'163'=> '&#163;', '164'=> '&#164;', '165'=> '&#165;', '166'=> '&#166;', '167'=> '&#167;', '168'=> '&#168;', '169'=> '&#169;', '170'=> '&#170;', '171'=> '&#171;', '172'=> '&#172;',
'173'=> '&#173;', '174'=> '&#174;', '175'=> '&#175;', '176'=> '&#176;', '177'=> '&#177;', '178'=> '&#178;', '179'=> '&#179;', '180'=> '&#180;', '181'=> '&#181;', '182'=> '&#182;',
'183'=> '&#183;', '184'=> '&#184;', '185'=> '&#185;', '186'=> '&#186;', '187'=> '&#187;', '188'=> '&#188;', '189'=> '&#189;', '190'=> '&#190;', '191'=> '&#191;', '192'=> '&#192;',
'193'=> '&#193;', '194'=> '&#194;', '195'=> '&#195;', '196'=> '&#196;', '197'=> '&#197;', '198'=> '&#198;', '199'=> '&#199;', '200'=> '&#200;', '201'=> '&#201;', '202'=> '&#202;',
'203'=> '&#203;', '204'=> '&#204;', '205'=> '&#205;', '206'=> '&#206;', '207'=> '&#207;', '208'=> '&#208;', '209'=> '&#209;', '210'=> '&#210;', '211'=> '&#211;', '212'=> '&#212;',
'213'=> '&#213;', '214'=> '&#214;', '215'=> '&#215;', '216'=> '&#216;', '217'=> '&#217;', '218'=> '&#218;', '219'=> '&#219;', '220'=> '&#220;', '221'=> '&#221;', '222'=> '&#222;',
'223'=> '&#223;', '224'=> '&#224;', '225'=> '&#225;', '226'=> '&#226;', '227'=> '&#227;', '228'=> '&#228;', '229'=> '&#229;', '230'=> '&#230;', '231'=> '&#231;', '232'=> '&#232;',
'233'=> '&#233;', '234'=> '&#234;', '235'=> '&#235;', '236'=> '&#236;', '237'=> '&#237;', '238'=> '&#238;', '239'=> '&#239;', '240'=> '&#240;', '241'=> '&#241;', '242'=> '&#242;',
'243'=> '&#243;', '244'=> '&#244;', '245'=> '&#245;', '246'=> '&#246;', '247'=> '&#247;', '248'=> '&#248;', '249'=> '&#249;', '250'=> '&#250;', '251'=> '&#251;', '252'=> '&#252;',
'253'=> '&#253;', '254'=> '&#254;', '255'=> '&#255;'
);
$search = Array();
$replace = Array();
foreach ($map as $s => $r) {
$search[] = chr((int)$s);
$replace[] = $r;
}
return str_replace($search, $replace, $string);
}
@JayWood

This comment has been minimized.

Copy link
Owner Author

commented Jul 15, 2015

Updated to use HTML entities on lower end characters.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.