Skip to content

Instantly share code, notes, and snippets.

@b-alidra
Last active May 30, 2016 16:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save b-alidra/64b9ac51641c47e5be718f89b4df1e76 to your computer and use it in GitHub Desktop.
Save b-alidra/64b9ac51641c47e5be718f89b4df1e76 to your computer and use it in GitHub Desktop.
<?php
$els_to_remove = [
'blockquote', // Standard quote block tag
'div.moz-cite-prefix', // Thunderbird
'div.gmail_extra', 'div.gmail_quote', // Gmail
'div.yahoo_quoted' // Yahoo
];
?>
<?php
$dom = new PHPHtmlParser\Dom;
$dom->load($message_body);
foreach ($els_to_remove as $el) {
$founds = $dom->find($el)->toArray();
foreach ($founds as $f) {
$f->delete();
unset($f);
}
}
?>
<?php
// Let's remove empty tags
$els = $dom->find('p,span,b,strong,div')->toArray();
foreach ($els as $e) {
$html = trim($e->innerHtml());
if (empty($html) || $html == "&nbsp;") {
$e->delete();
unset($e);
}
}
?>
<?php
// Example of Outlook quote:
// <div>
// <hr id="stopSpelling">
// Date: Fri. 20 May 2016 17:40:24 +0200
// Subject: Votre facture Selon devis DEV201605201
// From: xxxxxx@microfactures.com
// To: xxxxxx@hotmail.fr
// Lorem ipsum dolor sit amet consectetur...
// </div>
// The idea is to delete #stopSpelling's parent...
$hr = $dom->find('#stopSpelling', /*nth result*/0);
if (null !== $hr) {
$hr->getParent()->delete();
}
?>
<?php
// Roundcube adds a paragraph with a sentence like this one, just
// before the quote:
// "Le 21-05-2016 02:25, AB Prog - Belkacem Alidra a écrit :"
// Let's remove it
$pattern = '/Le [0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}, [^:]+ a &amp;eacute;crit&amp;nbsp;:/';
$ps = $dom->find('p')->toArray();
foreach ($ps as $p) {
if (preg_match($pattern, $p->text())) {
$p->delete();
unset($p);
}
}
?>
<?php return $dom->root->innerHtml(); ?>
<?php
/**
* Remove the quoted parts from the message body.
*
* It actually handles:
* - Standard <blockquote>...</blockquote>
* - Google
* - Yahoo
* - Thunderbird
* - OSX Mail Client
* - Roundcube
*
* @param String $message_body
* The incoming or outcoming message to be cleaned
*
* @return String
* The cleaned message
*/
public static function strip_quotes_from_message($message_body)
{
$els_to_remove = [
'blockquote', // Standard quote block tag
'div.moz-cite-prefix', // Thunderbird
'div.gmail_extra', 'div.gmail_quote', // Gmail
'div.yahoo_quoted' // Yahoo
];
$dom = new PHPHtmlParser\Dom;
$dom->load($message_body);
foreach ($els_to_remove as $el) {
$founds = $dom->find($el)->toArray();
foreach ($founds as $f) {
$f->delete();
unset($f);
}
}
// Outlook doesn't respect
// http://www.w3.org/TR/1998/NOTE-HTMLThreading-0105#Appendix%20B
// We need to detect quoted replies "by hand"
//
// Example of Outlook quote:
//
// <div>
// <hr id="stopSpelling">
// Date: Fri. 20 May 2016 17:40:24 +0200<br>
// Subject: Votre facture Selon devis DEV201605201<br>
// From: xxxxxx@microfactures.com<br>
// To: xxxxxx@hotmail.fr<br>
// Lorem ipsum dolor sit amet consectetur adipiscing...
// </div>
//
// The idea is to delete #stopSpelling's parent...
$hr = $dom->find('#stopSpelling', /*nth result*/0);
if (null !== $hr) {
$hr->getParent()->delete();
}
// Roundcube adds a <p> with a sentence like this one, just
// before the quote:
// "Le 21-05-2016 02:25, AB Prog - Belkacem Alidra a écrit :"
// Let's remove it
$pattern = '/Le [0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}, [^:]+ a &eacute;crit&nbsp;:/';
$ps = $dom->find('p')->toArray();
foreach ($ps as $p) {
if (preg_match($pattern, $p->text())) {
$p->delete();
unset($p);
}
}
// Let's remove empty tags like <p> </p>...
$els = $dom->find('p,span,b,strong,div')->toArray();
foreach ($els as $e) {
$html = trim($e->innerHtml());
if (empty($html) || $html == "&nbsp;") {
$e->delete();
unset($e);
}
}
return $dom->root->innerHtml();
}
?>
<?php function strip_quotes_from_message($message_body) ?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment