Download lolcats from your soup.io while it's up! Needs Ruby (used with MRI - "the regular ruby" - 1.9.3, should work with 1.9.2 or 2.0.0; 1.8.7 is a "maybe") and the Nokogiri gem (http://nokogiri.org).
require 'fileutils' | |
require 'open-uri' | |
require 'yaml' | |
require 'rubygems' | |
require 'nokogiri' | |
FileUtils.mkdir_p 'items' | |
feed = Nokogiri::XML(File.open('3fc1319c496ea1aacde451cecbdc17e3.rss')) | |
items = feed.xpath('/rss/channel/item') | |
n = items.length | |
errors = 0 | |
begin | |
items.each_with_index do |item, i| | |
guid = item.xpath('guid').text.sub /^urn:www-soup-io:1:/, '' | |
puts "* #{guid} #{i}/#{n} #{ '%.2f' % ((i.to_f/n)*100) }%" | |
File.open("items/#{guid}.yml", 'w') do |ymlf| | |
ymlf.puts YAML::dump( | |
'title' => item.xpath('title').text, | |
'link' => item.xpath('link').text, | |
'date' => item.xpath('pubDate').text, | |
'description' => item.xpath('description').text ) | |
end | |
item.xpath('enclosure').each do |encl| | |
url = encl['url'] | |
file = "assets/#{guid}-#{url.split('/').last}" | |
if File.size?(file) | |
puts " Exists: #{file}" | |
else | |
puts " Download: #{url} => #{file}" | |
begin | |
open(url) do |img| | |
open(file, 'wb') do |imgf| | |
imgf.write(img.read) | |
end | |
end | |
rescue => e | |
puts " `->Error: #{e}, let's try again later" | |
FileUtils.rm_f file | |
errors += 1 | |
end | |
end | |
end | |
end | |
ensure | |
puts "** #{errors} errors **" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment