Skip to content

Instantly share code, notes, and snippets.

@JacobDB
Created March 15, 2016 19:26
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save JacobDB/1556b2e92b84291dfdd8 to your computer and use it in GitHub Desktop.
Save JacobDB/1556b2e92b84291dfdd8 to your computer and use it in GitHub Desktop.
Web scraper to convert TuneGenie in to RSS
<?php
$url = "http://wkqx.tunegenie.com/";
$title = "WKQX RSS Feed";
$description = "A constantly updated feed for WKQX's playlist";
$userAgent = "Googlebot/2.1 (http://www.googlebot.com/bot.html)";
header("Content-type: text/xml; charset=utf-8", true);
echo "<?xml version='1.0' encoding='UTF-8' ?>" . PHP_EOL;
echo "<rss version='2.0'>" . PHP_EOL;
echo "<channel>" . PHP_EOL;
echo "<title>{$title}</title>" . PHP_EOL;
echo "<link>{$url}</link>" . PHP_EOL;
echo "<description>{$description}</description>" . PHP_EOL;
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 2);
$html = curl_exec($curl);
$html = @mb_convert_encoding($html, "HTML-ENTITIES", "utf-8");
curl_close($curl);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$nodes = $dom->getElementsByTagName("*");
$date = "";
foreach ($nodes as $node) {
if ($node->getAttribute("class") == "large-9 small-8 columns hidden-on-close") {
$inodes = $node->childNodes;
foreach($inodes as $inode) {
if (isset($inode->attributes)) {
if ($inode->getAttribute("class") == "left") {
$iinodes = $inode->childNodes;
echo "<item>" . PHP_EOL;
$counter = 0;
foreach ($iinodes as $iinode) {
$counter++;
if ($counter == 2) {
echo "<title>" . htmlspecialchars($iinode->nodeValue) . "</title>" . PHP_EOL;
$iiinode = $iinode->childNodes;
foreach ($iiinode as $iiinode) {
if (isset($iiinode->attributes)) {
echo "<link>http://wkqx.tunegenie.com" . $iiinode->getAttribute("href") . "</link>" . PHP_EOL;
}
}
} elseif ($counter == 4) {
echo "<description>" . htmlspecialchars($iinode->nodeValue) . "</description>" . PHP_EOL;
}
}
if ($date) {
echo "<pubDate>" . date(DATE_RSS, $date) . "</pubDate>" . PHP_EOL;
}
echo '</item>' . PHP_EOL;
}
}
}
}
/*
if ($node->nodeName == "h2") {
$date = strtotime($node->nodeValue);
}
if ($node->nodeName == "dt") {
$inodes = $node->childNodes;
foreach ($inodes as $inode) {
if ($inode->nodeName == "a" && $inode->getAttribute("class") == "permalink") {
echo "<item>" . PHP_EOL;
echo "<title>" . @mb_convert_encoding(htmlspecialchars($inode->getAttribute("title")), "utf-8") . "</title>" . PHP_EOL;
echo "<link>" . $inode->getAttribute("href") . "</link>" . PHP_EOL;
if ($date) {
echo "<pubDate>" . date(DATE_RSS, $date) . "</pubDate>" . PHP_EOL;
}
echo '</item>' . PHP_EOL;
}
}
}
*/
}
echo "</channel></rss>";
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment