Strip quotes from html emails in PHP. See https://b-alidra.com/strip-quoted-text-from-html-emails/
<?php | |
/** | |
* Remove the quoted parts from the message body. | |
* | |
* It actually handles: | |
* - Standard <blockquote>...</blockquote> | |
* - Yahoo | |
* - Thunderbird | |
* - OSX Mail Client | |
* - Roundcube | |
* | |
* @param Message $message | |
* The incoming or outcoming message to be cleaned | |
* | |
* @return Message | |
* The cleaned message | |
*/ | |
public static function strip_quotes_from_message(Message $message) | |
{ | |
$els_to_remove = [ | |
'blockquote', // Standard quote block tag | |
'div.moz-cite-prefix', // Thunderbird | |
'div.gmail_extra', 'div.gmail_quote', // Gmail | |
'div.yahoo_quoted' // Yahoo | |
]; | |
$dom = new PHPHtmlParser\Dom; | |
$dom->load($message->body); | |
foreach ($els_to_remove as $el) { | |
$founds = $dom->find($el)->toArray(); | |
foreach ($founds as $f) { | |
$f->delete(); | |
unset($f); | |
} | |
} | |
// Outlook doesn't respect | |
// http://www.w3.org/TR/1998/NOTE-HTMLThreading-0105#Appendix%20B | |
// We need to detect quoted replies "by hand" | |
// | |
// Example of Outlook quote: | |
// | |
// <div> | |
// <hr id="stopSpelling"> | |
// Date: Fri. 20 May 2016 17:40:24 +0200<br> | |
// Subject: Votre facture Selon devis DEV201605201<br> | |
// From: xxxxxx@microfactures.com<br> | |
// To: xxxxxx@hotmail.fr<br> | |
// Lorem ipsum dolor sit amet consectetur adipiscing... | |
// </div> | |
// | |
// The idea is to delete #stopSpelling's parent... | |
$hr = $dom->find('#stopSpelling', /*nth result*/0); | |
if (null !== $hr) { | |
$hr->getParent()->delete(); | |
} | |
// Roundcube adds a <p> with a sentence like this one, just | |
// before the quote: | |
// "Le 21-05-2016 02:25, AB Prog - Belkacem Alidra a écrit :" | |
// Let's remove it | |
$pattern = '/Le [0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}, [^:]+ a écrit :/'; | |
$ps = $dom->find('p')->toArray(); | |
foreach ($ps as $p) { | |
if (preg_match($pattern, $p->text())) { | |
$p->delete(); | |
unset($p); | |
} | |
} | |
// Let's remove empty tags like <p> </p>... | |
$els = $dom->find('p,span,b,strong,div')->toArray(); | |
foreach ($els as $e) { | |
$html = trim($e->innerHtml()); | |
if (empty($html) || $html == " ") { | |
$e->delete(); | |
unset($e); | |
} | |
} | |
$message->body = $dom->root->innerHtml(); | |
return $message; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment