Last active
May 30, 2016 16:38
-
-
Save b-alidra/64b9ac51641c47e5be718f89b4df1e76 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Example of Outlook quote: | |
// <div> | |
// <hr id="stopSpelling"> | |
// Date: Fri. 20 May 2016 17:40:24 +0200 | |
// Subject: Votre facture Selon devis DEV201605201 | |
// From: xxxxxx@microfactures.com | |
// To: xxxxxx@hotmail.fr | |
// Lorem ipsum dolor sit amet consectetur... | |
// </div> | |
// The idea is to delete #stopSpelling's parent... | |
$hr = $dom->find('#stopSpelling', /*nth result*/0); | |
if (null !== $hr) { | |
$hr->getParent()->delete(); | |
} | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Roundcube adds a paragraph with a sentence like this one, just | |
// before the quote: | |
// "Le 21-05-2016 02:25, AB Prog - Belkacem Alidra a écrit :" | |
// Let's remove it | |
$pattern = '/Le [0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}, [^:]+ a &eacute;crit&nbsp;:/'; | |
$ps = $dom->find('p')->toArray(); | |
foreach ($ps as $p) { | |
if (preg_match($pattern, $p->text())) { | |
$p->delete(); | |
unset($p); | |
} | |
} | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php return $dom->root->innerHtml(); ?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Remove the quoted parts from the message body. | |
* | |
* It actually handles: | |
* - Standard <blockquote>...</blockquote> | |
* - Yahoo | |
* - Thunderbird | |
* - OSX Mail Client | |
* - Roundcube | |
* | |
* @param String $message_body | |
* The incoming or outcoming message to be cleaned | |
* | |
* @return String | |
* The cleaned message | |
*/ | |
public static function strip_quotes_from_message($message_body) | |
{ | |
$els_to_remove = [ | |
'blockquote', // Standard quote block tag | |
'div.moz-cite-prefix', // Thunderbird | |
'div.gmail_extra', 'div.gmail_quote', // Gmail | |
'div.yahoo_quoted' // Yahoo | |
]; | |
$dom = new PHPHtmlParser\Dom; | |
$dom->load($message_body); | |
foreach ($els_to_remove as $el) { | |
$founds = $dom->find($el)->toArray(); | |
foreach ($founds as $f) { | |
$f->delete(); | |
unset($f); | |
} | |
} | |
// Outlook doesn't respect | |
// http://www.w3.org/TR/1998/NOTE-HTMLThreading-0105#Appendix%20B | |
// We need to detect quoted replies "by hand" | |
// | |
// Example of Outlook quote: | |
// | |
// <div> | |
// <hr id="stopSpelling"> | |
// Date: Fri. 20 May 2016 17:40:24 +0200<br> | |
// Subject: Votre facture Selon devis DEV201605201<br> | |
// From: xxxxxx@microfactures.com<br> | |
// To: xxxxxx@hotmail.fr<br> | |
// Lorem ipsum dolor sit amet consectetur adipiscing... | |
// </div> | |
// | |
// The idea is to delete #stopSpelling's parent... | |
$hr = $dom->find('#stopSpelling', /*nth result*/0); | |
if (null !== $hr) { | |
$hr->getParent()->delete(); | |
} | |
// Roundcube adds a <p> with a sentence like this one, just | |
// before the quote: | |
// "Le 21-05-2016 02:25, AB Prog - Belkacem Alidra a écrit :" | |
// Let's remove it | |
$pattern = '/Le [0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}, [^:]+ a écrit :/'; | |
$ps = $dom->find('p')->toArray(); | |
foreach ($ps as $p) { | |
if (preg_match($pattern, $p->text())) { | |
$p->delete(); | |
unset($p); | |
} | |
} | |
// Let's remove empty tags like <p> </p>... | |
$els = $dom->find('p,span,b,strong,div')->toArray(); | |
foreach ($els as $e) { | |
$html = trim($e->innerHtml()); | |
if (empty($html) || $html == " ") { | |
$e->delete(); | |
unset($e); | |
} | |
} | |
return $dom->root->innerHtml(); | |
} | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php function strip_quotes_from_message($message_body) ?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment