Skip to content

Instantly share code, notes, and snippets.

@singpolyma
Created November 20, 2008 23:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save singpolyma/27268 to your computer and use it in GitHub Desktop.
Save singpolyma/27268 to your computer and use it in GitHub Desktop.
<?php
require_once dirname(__FILE__).'/Outline.php';
require_once dirname(__FILE__).'/OutlineFromXML.php';
function checkXML($data) {//returns FALSE if $data is well-formed XML, errorcode otherwise
$rtrn = 0;
$theParser = xml_parser_create();
if(!xml_parse_into_struct($theParser,$data,$vals)) {
$errorcode = xml_get_error_code($theParser);
if($errorcode != XML_ERROR_NONE && $errorcode != 27)
$rtrn = $errorcode;
}//end if ! parse
xml_parser_free($theParser);
return $rtrn;
}//end function checkXML
function url_get($url) {
if(function_exists('curl_init')) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,TRUE);
$result = curl_exec($ch);
curl_close($ch);
} else $result = file_get_contents($url);
return $result;
}//end function url_get
function std_feed_parse($xmldata) {
switch(true) {
case (bool)stristr($xmldata,'<rss'):
$struct = new OutlineFromXML($xmldata,array('rootel' => 'rss','itemel' => 'channel>item','subitemels' => false,'collapsels' => array('title','description')));
$inform = 'rss20';
break;
case (bool)(stristr($xmldata,'<rdf') && stristr($xmldata,'<channel')):
$struct = new OutlineFromXML($xmldata,array('rootel' => 'rdf:RDF','itemel' => 'item','subitemels' => false,'collapsels' => array('title','description')));
$inform = 'rss10';
break;
case (bool)stristr($xmldata,'<feed'):
$struct = new OutlineFromXML($xmldata,array('rootel' => 'feed','itemel' => 'entry','subitemels' => false,'collapsels' => array('title','content','summary')));
$inform = 'atom';
break;
case (bool)(stristr($xmldata,'<CHANNEL') && stristr($xmldata,'<ITEM')):
$struct = new OutlineFromXML($xmldata,array('rootel' => 'CHANNEL','itemel' => 'ITEM','subitemels' => false,'collapsels' => array('TITLE','ABSTRACT')));
$inform = 'rss10';
break;
/* case (bool)(stristr($xmldata,'hentry')):
$tidy = new tidy;
$tidy->parseString($xmldata, array('output-xml' => true, 'doctype' => 'loose', 'add-xml-decl' => true),'utf8');
$tidy->cleanRepair();
$xml = str_replace('&nbsp;','&#160;',$tidy->value);
$inform = 'hatom';
break;*/
default:
$xmldata = preg_replace('/<!DOCTYPE[^\f]*?>/','', $xmldata);
$xmldata = preg_replace('/<(meta|link|br|img)([^\f]*?)([^\/])>/','<$1$2$3 />', $xmldata);
$xmldata = str_replace('&nbsp;','&#160;',$xmldata);
$theParser = xml_parser_create();
xml_parse_into_struct($theParser,$xmldata,$vals);
xml_parser_free($theParser);
$alternates = array();
foreach($vals as $el) {
if(in_array('alternate', explode(' ', $el['attributes']['REL'])))
$alternates[$el['attributes']['TYPE']] = $el['attributes']['HREF'];
}//end foreach
$url = $alternates['application/rss+xml'] ? $alternates['application/rss+xml'] : array_shift($alternates);
if($url) return std_feed_parse(url_get($url));
return array('items' => array());
break;
}//end switch TRUE
if(is_a($struct->getField('channel'),'Outline')) {
$channel = $struct->getField('channel');
foreach($channel->getFields() as $name => $val)
$struct->addField($name,$val);
$struct->unsetField('channel');
}//end if channel
$data = array();
$data['title'] = $struct->getField('title');
if(!$data['title'])
$data['title'] = $struct->getField('dc:title');
if(!$data['title'])
$data['title'] = $struct->getField('text');
if(is_a($data['title'],'Outline')) {
if($data['title']->getNumNodes()) {
$tmp = $data['title']->getNode(0);
$data['title'] = $tmp->getField('text');
} else
$data['title'] = '';
}//end if is_a title Outline
$data['link'] = $struct->getField('link');
if(is_a($data['link'],'Outline')) {
$tmp = $data['link'];
unset($data['link']);
if($tmp->getField('rel') == 'alternate' || $tmp->getField('type') == 'text/html')
$data['link'] = $tmp->getField('href');
if(!$data['link']) {
foreach($tmp->getNodes() as $node) {
if($node->getField('rel') == 'alternate' || $node->getField('type') == 'text/html') {
$data['link'] = $node->getField('href');
break;
}//end if rel || type
}//end foreach nodes
}//end if ! $data['link']
}//end if link is_a Outline
if(!$data['link'])
$data['link'] = $struct->getField('id');
if(!$data['link'])
$data['link'] = $struct->getField('href');
$data['description'] = $struct->getField('description');
if(!$data['description'])
$data['description'] = $struct->getField('dc:description');
if(!$data['description'])
$data['description'] = $struct->getField('subtitle');
if(is_a($data['description'],'Outline'))
$data['description'] = $data['description']->getField('text');
if(!$data['description'])
$data['description'] = $struct->getField('abstract');
$data['language'] = $struct->getField('language');
if(!$data['language'])
$data['language'] = $struct->getField('dc:language');
$data['copyright'] = $struct->getField('copyright');
if(!$data['copyright'])
$data['copyright'] = $struct->getField('dc:rights');
$data['webMaster'] = $struct->getField('webmaster');
if(!$data['webMaster'])
$data['webMaster'] = $struct->getField('managingeditor');
$data['dc:creator'] = $struct->getField('dc:creator');
if(!$data['dc:creator'])
$data['dc:creator'] = $struct->getField('dc:contributor');
if($struct->getField('pubdate'))
$data['timestamp'] = strtotime($struct->getField('pubdate'));
if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('lastbuilddate'))
$data['timestamp'] = strtotime($struct->getField('lastbuilddate'));
if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('dc:date'))
$data['timestamp'] = strtotime($struct->getField('dc:date'));
if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('published'))
$data['timestamp'] = strtotime($struct->getField('published'));
if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('updated'))
$data['timestamp'] = strtotime($struct->getField('updated'));
if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('modified'))
$data['timestamp'] = strtotime($struct->getField('modified'));
$data['category'] = $struct->getField('category');
if(is_a($data['category'],'Outline')) {
$cats = $data['category'];
$data['category'] = array();
foreach($cats->toArray() as $cat)
$data['category'][] = $cat['text'];
}//end if is_a Outline
if($data['category'] && !is_array($data['category']))
$data['category'] = array($data['category']);
if(!$data['category'] && $struct->getField('dc:subject')) {
$data['category'] = $struct->getField('dc:subject');
if(is_a($data['category'],'Outline')) {
$cats = $data['category'];
$data['category'] = array();
foreach($cats->toArray() as $cat)
$data['category'][] = $cat['text'];
} else {
$data['category'] = explode(' ',$data['category']);
}//end if-else $data['category'] is_a Outline
}//end if ! category
$data['image'] = $struct->getField('image');
if(is_a($data['image'],'Outline'))
$data['image'] = $data['image']->toArray();
if(!$data['image'])
$data['image'] = $struct->getField('logo');
if(is_a($data['image'],'Outline')) {
if(!$data['image']->getField('href') && $data['image']->getNumNodes())
$tmp = $data['image']->getNode(0);
else
$tmp = $data['image'];
$data['image'] = array('url' => $tmp->getField('href'));
}//end if is_a image Outline
$data['items'] = array();
foreach($struct->getNodes() as $node) {
$item = array();
$tmp = $node->getField('media:group');
if($tmp && is_a($tmp, 'Outline')) {
foreach($tmp->getFields() as $name => $val)
$node->addField($name, $val);
}//end if media:group
$item['title'] = $node->getField('title');
if(!$item['title'])
$item['title'] = $node->getField('dc:title');
$item['link'] = $node->getField('link');
if(is_a($item['link'],'Outline')) {
$tmp = $item['link'];
unset($item['link']);
$item['link'] = $tmp->getField('href');
if(!$item['link']) {
foreach($tmp->getNodes() as $node2) {
if($node2->getField('rel') == 'alternate' || $node2->getField('type') == 'text/html') {
$item['link'] = $node2->getField('href');
break;
}//end if rel || type
}//end foreach nodes
}//end if ! $item['link']
}//end if link is_a Outline
if(!$item['link'])
$item['link'] = $node->getField('href');
$item['description'] = $node->getField('description');
if(strlen($node->getField('content:encoded')) > strlen($item['description']))
$item['description'] = $node->getField('content:encoded');
if(!$item['description'])
$item['description'] = $node->getField('dc:description');
if(!$item['description'])
$item['description'] = $node->getField('content');
if(!$item['description'])
$item['description'] = $node->getField('summary');
if(!$item['description'])
$item['description'] = $node->getField('abstract');
$item['dc:creator'] = $node->getField('dc:creator');
if(!$item['dc:creator'])
$item['dc:creator'] = $node->getField('dc:contributor');
$item['author'] = $node->getField('author');
if(is_a($item['author'],'Outline')) {
if(!$item['dc:creator']) $item['dc:creator'] = $item['author']->getField('name');
$item['author'] = $item['author']->getField('email');
}//end if author is_a Outline
if(substr(trim($item['author']),0,19) == 'noemail@noemail.org') {
$item['author'] = trim($item['author']);
if(!$item['dc:creator']) {
$item['dc:creator'] = substr($item['author'],21,strlen($item['author']));
$item['dc:creator'] = substr($item['dc:creator'],0,strlen($item['dc:creator'])-1);
}//end if !$item['dc:creator']
unset($item['author']);
}//end if noemail@noemail.org
$item['category'] = $node->getField('category');
if(is_a($item['category'],'Outline')) {
$cats = $item['category'];
$item['category'] = array();
if(!$cats->getNumNodes())
$cats = array($cats->toArray());
else
$cats = $cats->toArray();
foreach($cats as $cat) {
if(!$cat['text']) $cat['text'] = $cat['term'];
if(!$cat['text']) continue;
$item['category'][] = $cat['text'];
}//end foreach cats
}//end if is_a Outline
if($item['category'] && !is_array($item['category']))
$item['category'] = array($item['category']);
if(!$item['category'] && $node->getField('dc:subject')) {
$item['category'] = $node->getField('dc:subject');
if(is_a($item['category'],'Outline')) {
$cats = $item['category'];
$item['category'] = array();
foreach($cats->toArray() as $cat)
$item['category'][] = $cat['text'];
} else {
$item['category'] = explode(' ',$item['category']);
}//end if-else dc:subject is_a Outline
}//end if ! category
if(!$item['category'] && $node->getField('media:keywords')) {
$item['category'] = $node->getField('media:keywords');
if(is_a($item['category'],'Outline')) {
$cats = $item['category'];
$item['category'] = array();
foreach($cats->toArray() as $cat)
$item['category'][] = $cat['text'];
} else {
$item['category'] = explode(', ',$item['category']);
}//end if-else dc:subject is_a Outline
}//end if ! category
$item['comments'] = $node->getField('comments');
$item['enclosure'] = $node->getField('enclosure');
if(is_a($item['enclosure'],'Outline')) {
$tmp = $item['enclosure'];
$item['enclosure']['url'] = $tmp->getField('url');
$item['enclosure']['length'] = $tmp->getField('length');
$item['enclosure']['type'] = $tmp->getField('type');
}//end if $item['enclosure'] is_a Outline
$item['guid'] = $node->getField('guid');
if(is_a($item['guid'],'Outline'))
$item['guid'] = $item['guid']->getField('text');
if(!$item['guid'])
$item['guid'] = $node->getField('id');
if(!$item['guid'] && $item['link'])
$item['guid'] = $item['link'];
if(!$item['guid'])
$item['guid'] = md5($item['title'].$item['description']);
$item['timestamp'] = $node->getField('pubdate') ? strtotime($node->getField('pubdate')) : NULL;
if(!$item['timestamp'])
$item['timestamp'] = $node->getField('dc:date') ? strtotime($node->getField('dc:date')) : NULL;
if(!$item['timestamp'])
$item['timestamp'] = $node->getField('issued') ? strtotime($node->getField('issued')) :
NULL;
if(!$item['timestamp'])
$item['timestamp'] = $node->getField('created') ? strtotime($node->getField('created')) : NULL;
if(!$item['timestamp'])
$item['timestamp'] = $node->getField('published') ? strtotime($node->getField('published')) : NULL;
if(!$item['timestamp'])
$item['timestamp'] = $node->getField('updated') ? strtotime($node->getField('updated')) : NULL;
if(!$item['timestamp'])
$item['timestamp'] = $node->getField('modified') ? strtotime($node->getField('modified')) : NULL;
$item['source'] = $node->getField('source');
if(is_a($item['source'],'Outline')) {
$tmp = $item['source'];
$item['source'] = array();
$item['source']['title'] = $tmp->getField('text');
$item['source']['url'] = $tmp->getField('url');
}//end if source is_a Outline
if(!$item['source'] && $node->getField('dc:source'))
$item['source']['url'] = $node->getField('dc:source');
$item['wfw:comment'] = $node->getField('wfw:comment');
$item['wfw:commentRss'] = $node->getField('wfw:commentrss');
if($node->getField('media:content'))
$item['media:content'] = $node->getField('media:content')->toArray();
if($node->getField('media:thumbnail'))
$item['media:thumbnail'] = $node->getField('media:thumbnail')->toArray();
array_push($data['items'],$item);
}//end foreach nodes
$data['items'] = array_values($data['items']);
return $data;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment