Skip to content

Instantly share code, notes, and snippets.

@jgeboski
Last active August 29, 2015 14:26
Show Gist options
  • Save jgeboski/b10a8692dbf4e54a4596 to your computer and use it in GitHub Desktop.
Save jgeboski/b10a8692dbf4e54a4596 to your computer and use it in GitHub Desktop.
FreshRSS plugin which strips intrusive HTML elements
<?php
/*
* Copyright 2015 James Geboski <jgeboski@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Installing:
* 1. Move extension.php and metadata.json to <freshrss>/extentions/stripper
* 2. Enable via the Extentions interface
*/
class StripperExtension extends Minz_Extension
{
const strip = array('iframe', 'img');
public function init()
{
$func = array($this, 'entryBeforeDisplay');
$this->registerHook('entry_before_display', $func);
}
public function entryBeforeDisplay($entry)
{
$data = $entry->content();
$data = mb_convert_encoding($data, 'HTML-ENTITIES', 'UTF-8');
$dom = new DOMDocument();
@$dom->loadHTML($data);
$xpath = new DOMXpath($dom);
foreach (self::strip as $name) {
self::strip($xpath, $name);
}
$entry->_content($dom->saveHTML());
return $entry;
}
private static function strip($xpath, $name)
{
$nodes = $xpath->query('//' . $name);
foreach ($nodes as $node) {
$ops = array(
'nextSibling' => NULL,
'previousSibling' => NULL,
'parentNode' => NULL
);
foreach ($ops as $op => $n) {
$ops[$op] = self::getElement($node, $op);
}
$node->parentNode->removeChild($node);
foreach ($ops as $op => $n) {
while (($n != NULL) && self::isElementEmpty($n)) {
$s = self::getElement($n, $op);
$n->parentNode->removeChild($n);
$n = $s;
}
}
}
}
private static function isElementEmpty($node)
{
if ($node->nodeType != XML_ELEMENT_NODE) {
return false;
}
$name = strtolower($node->nodeName);
if (in_array($name, self::strip)) {
return false;
}
foreach ($node->childNodes as $c) {
if ($node->nodeType != XML_ELEMENT_NODE) {
continue;
}
if (!self::isElementEmpty($c, self::strip)) {
return false;
}
}
$val = html_entity_decode($node->nodeValue);
return preg_match('/^\s*$/', $val) == 1;
}
private static function getElement($node, $op)
{
for ($n = $node->$op; $n != NULL; $n = $n->$op) {
if ($n->nodeType == XML_ELEMENT_NODE) {
return $n;
}
}
return NULL;
}
}
?>
{
"name": "Stripper",
"author": "James Geboski",
"description": "Strip intrusive HTML elements (iframes, images, etc).",
"version": 0.1,
"entrypoint": "Stripper",
"type": "user"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment