Skip to content

Instantly share code, notes, and snippets.

@andrewharvey
Created July 15, 2011 10:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewharvey/1084426 to your computer and use it in GitHub Desktop.
Save andrewharvey/1084426 to your computer and use it in GitHub Desktop.
Generates an RSS feed with mp4 links for SBS's Tour de France Coverage
#!/usr/bin/perl
# SBS Playlist To RSS - v0.2.0
# This script will download the ajax xml file containing the latest full episode videos added to the SBS.com.au site and convert this data into an RSS feed format.
# Originally adapted from the code at http://www.perl.com/pub/a/2001/11/15/creatingrss.html by Chris Ball.
# I declar this code to be in the public domain.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
use strict;
use warnings;
use LWP::Simple; #for downloading over HTTP
use HTML::TokeParser; #to parse HTML files
use XML::RSS; #to generate the RSS file
use Date::Format;
use XML::Mini::Document; #to grab data from XML files
# Variables that you should set,
my $playlist = 75; #94=full ep, 95=sneekpeek
# Constants
my %playlist_hash = (
'75' => 'Tour de France',
'94' => 'Latest Full Episodes',
'95' => 'Latest Sneek Peek',
);
my $playlisturl = "http://www.sbs.com.au/shows/ajax/getplaylist/playlistId/".$playlist."/";
my $smil_baseurl = "http://player.sbs.com.au/video/smil/index/standalone/";
#Prepare some things...
# LWP::Simple Download the playlist xml file using get();.
my $playlistxml = get( $playlisturl ) or die $!;
# Create a TokeParser object, using our downloaded HTML.
my $playlistxml_stream = HTML::TokeParser->new( \$playlistxml ) or die $!;
# Create the RSS object.
my $rss = XML::RSS->new( version => '2.0' );
# Prep the RSS.
$rss->channel(
title => "SBS ".$playlist_hash{$playlist},
link => $playlisturl,
language => 'en',
lastBulidDate => time2str("%a, %d %b %Y %T GMT", time),
);
$rss->image(
title => "SBS ".$playlist_hash{$playlist},
url => "http://www.sbs.com.au/web/images/sbslogo_footer.jpg",
link => $playlisturl
);
# Declare variables.
my ($tag);
my (
$eptitle, #Episode Title
$epthumb, #URL of Episode Thumbnail Image
$eptime, #Time and Date
$filename_base, #eg. "SRS_FE_Global_Village_Ep_19_44_48467"
$baseurl, #eg. "http://videocdn.sbs.com.au/u/video/" or "rtmp://specialbsc.fcod.llnwd.net/a1768/o21/"
$epcode, #eg. 48467
$delivery_protocol, #RTMP or HTTP
$img, #
$url128, #URL of 128K episode
$url300, #URL of 300K episode
$url1000, #URL of 1000K episode
$code1char, #Type of Episode (as char) SRS, DOC...
$code1 #Type of Episode (as full string) Series, Documentary...
);
#get_tag skips forward in the HTML from our current position to the tag specified, and
#get_trimmed_text will grab plaintext from the current position to the end position specified.
# Find an <a> tag.
while ( $tag = $playlistxml_stream->get_tag("a") ) {
# Inside this loop, $tag is at a <a> tag.
# But do we have a "title" token, too?
if ($tag->[1]{title}) {
# We do!, for each item (video)...
$eptitle = $tag->[1]{title};
# The next step is an <img></img> set.
$tag = $playlistxml_stream->get_tag('img');
$epthumb = $tag->[1]{src};
#get the flv filename from the img url
#eg,
# $epthumb = http://videocdn.sbs.com.au/u/thumbnails/SRS_FE_Global_Village_Ep_19_44_48467.jpg
# $filename_base = SRS_FE_Global_Village_Ep_19_44_48467
# $epcode = 48467
#FIXME: grab from smil index not thumb!
$filename_base = substr($epthumb, rindex($epthumb,"/") + 1, length($epthumb) - (rindex($epthumb,"/") + 1) - 4);
$epcode = substr($filename_base, rindex($filename_base, "_") + 1, length($filename_base)-rindex($filename_base, "_") + 1);
# Now lookup the episode format (RTMP, HTTP) and file details.
my $smilxml = get( $smil_baseurl.$epcode );
my $xmlDoc = XML::Mini::Document->new();
$xmlDoc->parse($smilxml);
my $xmlHash = $xmlDoc->toHash();
$baseurl = $xmlHash->{smil}{head}{meta}{base};
$delivery_protocol = substr($baseurl,0,4);
$url128 = $baseurl.$filename_base."_128K.mp4";
$url300 = $baseurl.$filename_base."_300K.mp4";
$url1000 = $baseurl.$filename_base."_1000K.mp4";
#SRS|DOC|MOV
$code1char = substr($filename_base,0,3);
my %epcode_hash = (
'DOC' => 'Documentary',
'MOV' => 'Movie',
'SRS' => 'Series',
);
if (exists($epcode_hash{$code1char})) { #in case we don't get a match we don't want an uninit var warning
$code1 = $epcode_hash{$code1char};
}else {
$code1 = "";
}
$playlistxml_stream->get_tag('a');
$tag = $playlistxml_stream->get_tag('p');
# Now we can grab $eptime, by using get_trimmed_text up to the close of the <p> tag.
$eptime = $playlistxml_stream->get_trimmed_text('/p');
# We need to escape ampersands, as they start entity references in XML.
# Although we don't expect any..
$eptime =~ s/&/&amp;/g;
if ($delivery_protocol eq "http") {
# Add the item to the RSS feed.
$rss->add_item(
title => $eptitle,
permaLink => $smil_baseurl.$epcode,
enclosure => { url=>$url1000, type=>"video/x-flv"},
description => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
$eptitle<br />
$eptime<br />
Delivery: FLV over <b>HTTP</b><br />
Links: <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
Type: $code1<br />]]>");
}elsif ($delivery_protocol eq "rtmp") {
#no enclosure
# Add the item to the RSS feed.
$rss->add_item(
title => $eptitle,
permaLink => $smil_baseurl.$epcode,
description => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
$eptitle<br />
$eptime<br />
Delivery: FLV over <b>RTMP</b><br />
Links: <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
Type: $code1<br />]]>");
}
}
}
#print "Content-Type: application/xml; charset=ISO-8859-1"; # To help your browser display the feed better in your browser.
my $rssfilename = lc($playlist_hash{$playlist});
$rssfilename =~ s/\s+//g;
$rss->save("sbs".$rssfilename.".rss"); #this will save the RSS XML feed to a file when you run the script.
#print $rss->as_string; #this will send the RSS XML feed to stdout when you run the script.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment