Last active
December 10, 2015 13:59
-
-
Save adrianshort/4444819 to your computer and use it in GitHub Desktop.
Cheam North and Worcester Park local committee podcast feed creator. Scrapes the webpage and outputs an iTunes-friendly podcast RSS feed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Scrape webpage into a podcast RSS feed | |
# https://www.sutton.gov.uk/index.aspx?articleid=4332 | |
require 'nokogiri' | |
require 'open-uri' | |
require 'time' | |
require 'pp' | |
FEED_TITLE = "Cheam North and Worcester Park Local Committee" | |
FEED_IMAGE = "https://dl.dropbox.com/u/300783/logo.png" | |
FEED_AUTHOR = "London Borough of Sutton" | |
FEED_LINK = "https://www.sutton.gov.uk/index.aspx?articleid=4332" | |
url = "cnwp.html" | |
doc = Nokogiri.parse(open(url).read) | |
meeting = '' | |
items = [] | |
items_this_meeting = 0 | |
doc.at("#bodytext").children.each do |node| | |
if node.inner_text.match(/\d{1,2}\s+\w+\s+\d{4}/) # eg 10 December 2012 | |
meeting = node.inner_text.strip | |
items_this_meeting = 0 | |
end | |
node.children.each do |subnode| | |
if subnode.name == 'a' && subnode['href'].match(/\.mp3$/i) | |
items_this_meeting += 1 | |
items << { | |
:d => Time.parse(meeting) + ((items_this_meeting - 1) * 30 * 60), | |
:href => subnode['href'].strip, | |
:title => subnode.inner_text.strip | |
} | |
end | |
end | |
end | |
builder = Nokogiri::XML::Builder.new do |xml| | |
xml.rss('xmlns:itunes' => "http://www.itunes.com/dtds/podcast-1.0.dtd", | |
:version => "2.0") { | |
xml.channel { | |
xml.title FEED_TITLE | |
xml.link FEED_LINK | |
xml['itunes'].image(:href => FEED_IMAGE) | |
xml['itunes'].author FEED_AUTHOR | |
items.each do |i| | |
xml.item { | |
xml.title i[:title] | |
xml['itunes'].author FEED_AUTHOR | |
xml.enclosure( | |
:url => i[:href], | |
:type => "audio/mpeg" | |
) | |
xml.guid i[:href] | |
xml.pubDate i[:d].rfc822 | |
} | |
end | |
} | |
} | |
end | |
puts builder.to_xml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment