Last active
May 18, 2017 23:58
-
-
Save henrik/10779 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# SweDB XMLTV Grabber for EyeTV | |
# by Henrik Nyh <http://henrik.nyh.se> under the MIT License. | |
# | |
# INSTALLATION | |
# | |
# Configure the list of channels below and run this script as a cron job, e.g. | |
# | |
# 0 */12 * * * ruby /path/to/this/script.rb | |
# to run it every 12 hours (suggested since cron jobs don't run if the computer is off). | |
# | |
# Every time it runs it will get schedules for those channels and import them into EyeTV. | |
# The script uses conditional GET to only update files if they've changed. | |
# If you're on OS X Mavericks, you probably need to run this for the script to work: | |
# | |
# xcode-select --install | |
# sudo gem install hpricot | |
# | |
# If you're on Lion or Mountain Lion, install Xcode and its command-line tools and then run: | |
# | |
# sudo gem install hpricot | |
# | |
# On Snow Leopard, it should just work. | |
# | |
# CHANGELOG | |
# | |
# 2015-06-17 1.6: New root URL. | |
# 2013-10-25 1.5: Unbreak on Mavericks. | |
# 2012-01-14 1.4: cron example is once per 12 hours, not once a minute every 12th hour. | |
# 2009-05-05 1.3: MIT License. | |
# 2008-10-30 1.2: EyeTV no longer steals focus on import. | |
# 2008-09-15 1.1: Always update channel list; not just if updated on server. Don't break on 404. | |
# 2008-03-09 1.0: First version. | |
# Use ids as listed in http://xmltv.tvsajten.com/channels.xml.gz. | |
CHANNELS = %w[ | |
kanal9.se | |
hd.svt.se | |
history.viasat.se | |
world.discovery.com | |
nordic.travel.discovery.com | |
ztv.se | |
nature.viasat.se | |
showtime.nonstop.tv | |
silver.nonstop.tv | |
] | |
%w[zlib open-uri date fileutils rubygems hpricot].each {|lib| require lib } | |
DAYS = 15 # A value from 1 to 15. Fetches yesterday + n days. | |
# We need yesterday for today's early morning shows. | |
ROOT_URL = "http://xmltv.tvsajten.com/channels.xml.gz" | |
USER_AGENT = "SweDB XMLTV Grabber for EyeTV/1.6 <henrik@nyh.se>" | |
DIRECTORY = "/Library/Application Support/EyeTV/XMLTV" | |
DB = File.join(DIRECTORY, "condget.db") | |
SCHEDULE_RE = /_(\d{4}-\d{2}-\d{2})\.xml$/ | |
YESTERDAY = Date.today - 1 | |
# Load or create DB hash mapping file paths to Conditional GET modification dates. | |
# Using file paths instead of URLs since this makes removing out-of-date entries easier. | |
$db = File.open(DB) {|file| Marshal.load(file) } rescue Hash.new("") | |
# This method does conditional GET based on Last-Modified/If-Modified-Since only (no etag) | |
# and assumes gzipped content. | |
# Returns local file path if new content was retrieved, false otherwise. | |
def condget(url) | |
filename = File.basename(url, ".gz") | |
path = path_from_url(url) | |
open(url, "If-Modified-Since" => $db[path], "User-Agent" => USER_AGENT) do |response| | |
# Store last-modified | |
$db[path] = response.meta["last-modified"] | |
# Gunzip | |
begin | |
response_body = Zlib::GzipReader.new(response).read | |
rescue Zlib::GzipFile::Error | |
response.rewind | |
response_body = response.read | |
end | |
File.open(path, 'w') {|file| file.write response_body } | |
path | |
end | |
rescue OpenURI::HTTPError => e | |
raise unless e.message =~ /^(304|404)/ | |
false | |
end | |
def path_from_url(url) | |
filename = File.basename(url, ".gz") | |
File.join(DIRECTORY, filename) | |
end | |
# Remove expired schedules | |
glob = File.join(DIRECTORY, "*.xml") | |
expired_files = Dir[glob].select {|file| file =~ SCHEDULE_RE && Date.parse($1) < YESTERDAY } | |
expired_files.each do |file| | |
File.delete(file) | |
$db.delete(file) | |
end | |
# Update scheduling | |
updates = [] | |
# Create directory structure if it doesn't exist already | |
FileUtils.mkdir_p DIRECTORY | |
# Get and parse root file | |
new_root_file = condget(ROOT_URL) | |
doc = Hpricot(open(path_from_url(ROOT_URL))) | |
# Create active_channels.xml file containing only the requested channels | |
excluded_channels = doc.search('channel').reject {|channel| CHANNELS.include?(channel[:id]) } | |
Hpricot::Elements[*excluded_channels].remove | |
active_channels_file = File.join(DIRECTORY, "active_channels.xml") | |
active_channels_xml = doc.to_s.gsub(/\n\s+\n/, "\n") # get rid of excess whitespace | |
File.open(active_channels_file, "w") {|file| file.write active_channels_xml } | |
updates << active_channels_file | |
# Loop over channels and get schedules | |
channels = doc.search('channel').map {|channel| [channel[:id], channel.at('base-url').inner_text] } | |
channels.each do |channel| | |
id, base_url = channel | |
(YESTERDAY).upto(YESTERDAY + DAYS) do |day| | |
filename = "#{id}_#{day}.xml.gz" | |
url = File.join(base_url, filename) | |
if path = condget(url) | |
updates << path | |
end | |
end | |
end | |
# Persist the conditional GET DB hash to disk | |
File.open(DB, "w") {|file| Marshal.dump($db, file) } | |
# Do the actual EyeTV import | |
command = %w[open -ga EyeTV] + updates | |
system(*command) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Någon rapporterade följande fel – kanske nån annan som har samma: