Skip to content

Instantly share code, notes, and snippets.

@tpokorra
Last active December 18, 2015 02:38
Show Gist options
  • Save tpokorra/5711916 to your computer and use it in GitHub Desktop.
Save tpokorra/5711916 to your computer and use it in GitHub Desktop.
this script is used for creating an RSS feed from Typo3 pages, without touching the Typo3 installation
<?php
function getContent(&$contents, $startContentStr, $endContentStr)
{
// if we want the content of an html element, we need to count, eg. the div's
$element = '';
if ($startContentStr[0] == '<' && substr($endContentStr, 0, 2) == '</')
{
$posSpace = strpos($startContentStr, ' ');
$posClose = strpos($startContentStr, '>');
if ($posSpace !== false && $posClose !== false)
{
$element = substr($startContentStr, 1, $posSpace < $posClose ? $posSpace : $posClose);
}
else if ($posSpace !== false)
{
$element = substr($startContentStr, 1, $posSpace - 1);
}
else if ($posClose !== false)
{
$element = substr($startContentStr, 1, $posClose - 1);
}
else
{
$element = substr($startContentStr, 1);
}
$element = trim($element);
}
if ($startContentStr == '^')
{
$startContentStr = '';
$contentPos = 0;
}
else
{
$contentPos = strpos($contents, $startContentStr);
//if ($contentPos === false) die('Problem reading page '.$startContentStr);
if ($contentPos === false)
{
$contents = '';
return '';
}
}
$start = $contentPos + strlen($startContentStr);
if ($endContentStr == '^')
{
$end = strlen($contents);
}
else
{
$end = strpos($contents, $endContentStr, $start);
if (strlen($element) > 0)
{
// make sure we find the right ending element. count the number of open element tags
$result = substr($contents, $start, $end - $start);
while (substr_count($result, '<'.$element) <> substr_count($result, '</'.$element.'>') && $end !== false)
{
$end = strpos($contents, $endContentStr, $end + 1);
$result = substr($contents, $start, $end - $start);
}
if ($end === false)
{
// something went wrong. incorrect number of elements
$end = strlen($contents);
}
}
}
$result = substr($contents, $start, $end - $start);
$contents = substr($contents, $end);
return $result;
}
// it seems file_get_contents does not return the full page
function curl_get_file_contents($URL)
{
$c = curl_init();
curl_setopt($c, CURLOPT_TIMEOUT, 0);
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($c, CURLOPT_CONNECTTIMEOUT, 0);
curl_setopt($c, CURLOPT_URL, str_replace('&amp;', '&', $URL));
$agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 1.1.4322)';
curl_setopt($c, CURLOPT_USERAGENT, $agent);
$contents = curl_exec($c);
curl_close($c);
if ($contents) return $contents;
else return FALSE;
}
function GetArticleFromTypo3($date_published, $url)
{
$html = curl_get_file_contents($url);
$title = getContent($html, '<title>', '</title>');
$content = getContent($html, '<div id="content">', '</div>');
$content = preg_replace('%(<!--).*?(-->)%i', '', $content);
$content = preg_replace('%( javascript:linkTo_UnCryptMailto\().*?(\);)%i', '', $content);
$content = preg_replace('%( class=").*?(")%i', '', $content);
$content = preg_replace('%( style=").*?(")%i', '', $content);
$content = preg_replace('%( id=").*?(")%i', '', $content);
$content = str_replace('&nbsp;', ' ', $content);
$content = str_replace('&lt;', '&amp;lt;', $content);
$content = str_replace('<', '&lt;', $content);
$content = str_replace('>', '&gt;', $content);
?>
<item>
<title><?php echo $title; ?></title>
<link><?php echo $url;?></link>
<pubDate><?php echo date(DATE_RSS, $date_published); ?></pubDate>
<dc:creator>Timotheus Pokorra (TBits)</dc:creator>
<guid isPermaLink="false"><?php echo $url;?></guid>
<description><?php echo $content; ?></description>
</item>
<?php
}
function PrintRSSHeader()
{
header('Content-Type: text/xml');
echo '<?xml version="1.0" encoding="utf-8" ?'.">\n";
?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<title>TBits.net Kolab News</title>
<link>http://www.tbits.net</link>
<description>TBits.net works with the Kolab project and provides patches and extensions to Kolab on this RSS Feed</description>
<language>en</language>
<?php
}
function PrintRSSFooter()
{
?>
</channel>
</rss>
<?php
}
PrintRSSHeader();
GetArticleFromTypo3(mktime(1, 0, 0, 6, 4, 2013), "http://www.tbits.net/tbits-opensource/install-nightly-build-from-gitkolaborg-master.html");
GetArticleFromTypo3(mktime(1, 0, 0, 3, 27, 2013), "http://www.tbits.net/tbits-opensource/kolab3multipledomains.html");
PrintRSSFooter();
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment