Skip to content

Instantly share code, notes, and snippets.

@ramsey
Created November 16, 2011 20:00
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ramsey/1371162 to your computer and use it in GitHub Desktop.
Save ramsey/1371162 to your computer and use it in GitHub Desktop.
Generate OPML file from Delicious blogroll tag
#!/usr/local/bin/php
<?php
include_once('getRSSLocation.php');
// del.icio.us username and password
$username = 'your_username';
$password = 'your_password';
$cache_file = '/tmp/delicious-blogroll.xml';
$blogs = array();
// STEP ONE: CACHE DEL.ICIO.US DATA
// determine whether an update has been made to del.icio.us since the
// last update; if so, then grab the results from del.icio.us and cache them
$update = simplexml_load_file("https://{$username}:{$password}@api.del.icio.us/v1/posts/update");
if (strtotime($update['time']) > filemtime($cache_file))
{
// del.icio.us has been updated since last cache; recache
$data = file_get_contents("https://{$username}:{$password}@api.del.icio.us/v1/posts/all?tag=blogroll");
file_put_contents($cache_file, $data);
}
else
{
// if there have been no updates, then exit
exit;
}
// STEP TWO: READ CACHED DATA
$blogroll = simplexml_load_file($cache_file);
foreach ($blogroll->post as $post)
{
$blogs[] = array(
'name' => $post['description'],
'href' => $post['href']
);
}
// sort by name
$name = array();
foreach ($blogs as $k => $v)
{
$name[$k] = $v['name'];
}
array_multisort($name, SORT_ASC, $blogs);
// STEP THREE: GENERATE OPML
// start buffering output
ob_start();
echo '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
?>
<opml version="1.0">
<head>
<title>PHP Blogroll</title>
<expansionState/>
</head>
<body>
<outline text="php">
<?php
foreach ($blogs as $blog)
{
$html = file_get_contents($blog['href']);
if ($html !== FALSE)
{
// discover the blog's RSS feed
$xmlUrl = getRSSLocation($html, $blog['href']);
echo ' '; // line things up cleanly in output
echo '<outline ';
echo 'text="' . htmlentities($blog['name']) . '" ';
echo 'htmlUrl="' . htmlentities($blog['href']) . '" ';
echo 'title="' . htmlentities($blog['name']) . '"';
if ($xmlUrl)
{
echo ' type="rss"';
echo ' xmlUrl="' . htmlentities($xmlUrl) . '"';
}
echo "/>\n";
}
}
?>
</outline>
</body>
</opml>
<?php
// get OPML from buffer and save to file
$opml = ob_get_clean();
file_put_contents('/path/to/blogroll.opml', $opml);
?>
<?php
/**
* This is a cleaned up and modified version of Keith Deven's getRSSLocation()
* function, which can be found at:
* http://keithdevens.com/weblog/archive/2002/Jun/03/RSSAuto-DiscoveryPHP
*
* Cleaned up by Ben Ramsey, http://benramsey.com
*/
function getRSSLocation($html, $location)
{
if (!$html || !$location)
{
return FALSE;
}
else
{
// search through the HTML, save all <link> tags
// and store each link's attributes in an associative array
preg_match_all('/<link\s+(.*?)\s*\/?>/si', $html, $matches);
$links = $matches[1];
$final_links = array();
foreach ($links as $link)
{
$attributes = preg_split('/\s+/s', $link);
foreach ($attributes as $attribute)
{
$att = preg_split('/\s*=\s*/s', $attribute, 2);
if (isset($att[1]))
{
$att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]);
$final_link[strtolower($att[0])] = $att[1];
}
}
$final_links[] = $final_link;
}
// now figure out which one points to the RSS file
foreach ($final_links as $link)
{
$href = FALSE;
if (strcasecmp($link['rel'], 'alternate') == 0)
{
switch (strtolower($link['type']))
{
case 'application/rss+xml':
case 'application/atom+xml':
case 'text/xml':
$href = $link['href'];
break;
default:
$href = FALSE;
break;
}
if ($href)
{
if (strpos($href, "http://") === 0)
{
// absolute URL
$full_url = $href;
}
else
{
// otherwise, 'absolutize' it
$url_parts = parse_url($location);
// only made it work for http:// links
$full_url = "http://{$url_parts['host']}";
if (isset($url_parts['port']))
{
$full_url .= ":{$url_parts[port]}";
}
if (strpos($href, '/') !== 0)
{
// it's a relative link on the path
$full_url .= dirname($url_parts['path']);
if (substr($full_url, -1) != '/')
{
// if the last character isn't a '/', add it
$full_url .= '/';
}
}
$full_url .= $href;
}
return $full_url;
}
}
}
return FALSE;
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment