Skip to content

Instantly share code, notes, and snippets.

@dvrensk
Created February 16, 2018 11:10
Show Gist options
  • Save dvrensk/a60e2c91cc863c7577071c758f4ba620 to your computer and use it in GitHub Desktop.
Save dvrensk/a60e2c91cc863c7577071c758f4ba620 to your computer and use it in GitHub Desktop.
Script to download all screencasts from ElixirSips using the RSS feed. Should work for other podcast feeds too.
#! /usr/bin/env ruby
require "nokogiri"
require "date"
require "fileutils"
require "pry"
class GetAll
def self.run
check_upass
fetch_all
end
UPASS = ENV["UPASS"]
def self.check_upass
raise "Define UPASS=email@address.net:p4ssw0rd in the environment" unless UPASS.to_s.size > 0
end
def self.fetch_all
xml = Nokogiri.XML(File.open("feed.xml"))
xml.xpath("/rss/channel/item").each do |node|
new(node).fetch
end
end
def initialize(node)
@node = node
end
attr_reader :node
def fetch
FileUtils.mkdir_p dir
Dir.chdir dir do
File.write("episode.html", description)
if enclosure
fetch_per_enclosure
else
puts "No enclosure for #{dir}, looking at text content…"
fetch_from_links
end
end
end
def fetch_per_enclosure
url = enclosure["url"]
size = enclosure["length"].to_i
name = File.basename(url)
if File.exists?(name) && File.size(name) == size
# puts "#{dir} has mp4, skipping"
else
puts "Fetching #{name} to #{dir}:"
system "curl", "-u", UPASS, "-kOL", url
end
end
def fetch_from_links
html = Nokogiri.HTML(description)
first, *more = html.xpath("//ul/li/a").select {|n| n.text[/\.mp4$/] }
if first.nil?
puts "Nothing in content either; skipping"
return
elsif more.any?
puts "*** Found #{1 + more.size} mp4s; don't know what to do!"
return
else
name = first.text
if File.exists?(name) and false
puts "#{dir} has mp4 (complete or incomplete), skipping"
else
file_id = first["href"][/file_id=(\d+)/, 1]
url = "https://elixirsips.dpdcart.com/subscriber/download?file_id=#{file_id}"
puts "Fetching #{name} from #{url} to #{dir}:"
puts ">>> No, not actually since it never seems to work"
# system "curl", "-u", UPASS, "-kLo", name, url
end
end
end
def date
DateTime.parse(node.at("pubDate").text)
end
def clean_name
node.at("title").text.downcase.gsub(/[^-a-z0-9.]+/, "_")
end
def dir
[date.strftime("%Y-%m-%d"), clean_name].join("-")
end
def description
node.at("description").text
end
def enclosure
node.at("enclosure")
end
end
GetAll.run if $0 == __FILE__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment