Skip to content

Instantly share code, notes, and snippets.

View strip_word_html.php
<?php
function strip_word_html($text, $allowed_tags = '<b><i><sup><sub><em><strong><u><br>')
{
mb_regex_encoding('UTF-8');
//replace MS special characters first
$search = array('/&lsquo;/u', '/&rsquo;/u', '/&ldquo;/u', '/&rdquo;/u', '/&mdash;/u');
$replace = array('\'', '\'', '"', '"', '-');
$text = preg_replace($search, $replace, $text);
//make sure _all_ html entities are converted to the plain ascii equivalents - it appears
//in some MS headers, some html entities are encoded and some aren't