Skip to content

Instantly share code, notes, and snippets.

@joeyrobert
Last active September 27, 2017 04:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joeyrobert/8e05d7dcfba314afec2d611670c90f48 to your computer and use it in GitHub Desktop.
Save joeyrobert/8e05d7dcfba314afec2d611670c90f48 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# gem install xml-simple psych
require 'xmlsimple'
require 'psych'
require 'open-uri'
require 'fileutils'
require 'yaml'
require 'thread'
require 'net/https'
trap("SIGINT") { exit! }
# Print exaple config
def example
puts "Example:"
example = {
'Railscasts' => {
'path' => '~/podcasts/railscasts',
'url' => 'http://feeds.feedburner.com/railscasts',
},
'The Changelog' => {
'path' => '~/podcasts/thechangelog',
'url' => 'http://feeds.feedburner.com/thechangelog',
}
}
puts example.to_yaml
end
# Exit if the ./podcasts.yml file is missing
unless File.exists? File.expand_path("./podcasts.yml")
puts "Please create your ./podcasts.yml file."
example
exit
end
# Load and parse the ./podcasts.yml file, exit if it fails
begin
podcasts = YAML.parse_file(File.expand_path("./podcasts.yml")).to_ruby
rescue Psych::SyntaxError
puts "There was a problem parsing your ./podcasts.yml file."
example
exit
end
download_queue = Queue.new
stats_queue = Queue.new
ssl_options = {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE}
# Loop through the podcasts
podcasts.each do |name, config|
puts "Fetching #{name}"
# Fetch rss feed
xml = open(config['url'], ssl_options).read
# Parse the rss
hash = XmlSimple.xml_in(xml)
path = File.expand_path(config['path'])
count = 0
# Create the target directory if it doesn't exist
FileUtils.mkdir_p(path) unless File.exists? path
# Loop through rss items
hash['channel'][0]['item'].each do |item|
# Continue if it has an enclosure element
if item['enclosure'] && item['enclosure'][0]['url'] != nil
item_url = item['enclosure'][0]['url']
# Remove query parameters
item_fname = File.basename(item_url.gsub(/\?.*/, ''))
new_path = File.expand_path(item_fname, path)
# If the file is not present, download the file and move it into the target directory
unless File.exists?(new_path)
count += 1
download_queue << {
:name => item_fname,
:url => item_url,
:path => new_path
}
end
end
end
puts "Need to download #{count} episodes of #{name}"
end
workers = 5
puts "Starting worker thread with #{workers} workers for #{download_queue.length} files"
threads = workers.times.map do |i|
Thread.new do
loop do
begin
item = download_queue.pop(true)
rescue
break
end
stats_queue << "Downloading #{item[:name]}"
begin
tmp_file = item[:path] + '.tmp'
open(tmp_file, 'wb') do |file|
open(item[:url], ssl_options) do |uri|
file.write(uri.read)
end
end
FileUtils.mv(tmp_file, item[:path])
size = (File.size(item[:path]) / 1024.0 / 1024.0).round(2)
stats_queue << "Done #{item[:name]} (#{size}MB)"
rescue
stats_queue << "Failed to download #{item[:name]}"
end
end
end
end
stats_thread = Thread.new do
loop do
value = stats_queue.pop
if value
puts value
else
break
end
end
end
threads.map(&:join)
stats_queue << false
stats_thread.join()
puts 'Done.'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment