Skip to content

Instantly share code, notes, and snippets.

@VanTanev
Last active December 22, 2015 01:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VanTanev/6398879 to your computer and use it in GitHub Desktop.
Save VanTanev/6398879 to your computer and use it in GitHub Desktop.
Grab some pony gifs
#!/usr/bin/env ruby
require "optparse"
require "ostruct"
require "nokogiri"
require "uri"
require "open-uri"
require "net/http"
options = OpenStruct.new
OptionParser.new do |opts|
opts.banner = "Usage: grab.rb [options] \n" +
"Image grabber for the 200 gifs for 100 days MLP thread"
options.target_dir = Dir.getwd
opts.on("--target-dir DIRECTORY", "The output directory, defaults to the current directory") do |dir|
# maybe we are given a directory relative to our working dir?
dir = File.join(Dir.getwd, dir) unless Dir.exists?(dir)
fail "No such directory: #{dir}" unless Dir.exists?(dir)
options.target_dir = dir
end
opts.on("-o", "--overwrite", "Overwrite existing files") do |overwrite|
options.overwrite = true
end
opts.on("-n", "--no-separate-folders", "Do not create separate folders for the different seasons and the bonus gifs") do |no_separate_folders|
options.no_separate_folders = true
end
options.separate_folders = {
:season => "Season%02d",
:bonus => "Bonus",
}
options.url = "http://www.reddit.com/r/mylittlepony/comments/1ki386/200_gifs_for_100_days_a_gif_retrospective_of_mlp/"
opts.on("--url URL", "Url to use to grab the images, defaults to '#{options.url}'") do |url|
options.url = url
end
options.selector = ".expando a, .id-t1_cc09ygp > .entry .usertext-body a"
opts.on("--selector SELECTOR", "The link selector to use, defaults to '#{options.selector}'") do |selector|
options.selector = selector
end
options.bonus_selector = ".id-t1_ccwgi0x > .entry .usertext-body a, .id-t1_ccwgiew > .entry .usertext-body a, .id-t1_cdi7vmy > .entry .usertext-body a"
opts.on("--bonus-selector SELECTOR", "The link selector to use, defaults to '#{options.bonus_selector}'") do |bonus_selector|
options.bonus_selector = bonus_selector
end
end.parse!
links = []
Nokogiri::HTML(open(options.url)).css(options.selector, options.bonus_selector).each do |link|
links << { text: link.text, uri: link["href"] } if link["href"].match(/\.gif\z/)
end
$bonus_name_counter = 0
def filename_from_text(text, ext, options)
filename = text.strip.gsub(' ', '-').gsub(/[^\w-]/, '') + ext
unless filename.match(/\AS\d{2}E\d{2}/i)
filename = "Bonus-%03d-%s" % [$bonus_name_counter+=1, filename]
end
unless options.no_separate_folders
case filename
when /\AS(\d{2})/i
filename = File.join(options.separate_folders[:season] % Regexp.last_match(1), filename)
when /\ABonus/i
filename = File.join(options.separate_folders[:bonus], filename)
end
end
filename
end
links.each_with_index do |link, i|
print "Downloading %03d of %03d \"%s\" \r" % [i+1, links.size, link[:text]]
uri = URI::Parser.new.parse(link[:uri])
Net::HTTP.start(uri.host) do |http|
filename = File.join(options.target_dir, filename_from_text(link[:text], File.extname(uri.path), options))
Dir.mkdir(File.dirname(filename)) unless Dir.exists?(File.dirname(filename))
File.open(filename, "wb") do |f|
http.request_get(uri.path) do |response|
response.read_body do |segment|
f.write(segment)
end
end
end if !File.size?(filename) || options.overwrite
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment