Created
May 16, 2018 18:52
-
-
Save kaisershahid/d4f63fca23a57e9b8c046992492a4533 to your computer and use it in GitHub Desktop.
if you have a webpage you want to grab multiple resources from
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# simple downloader: pass a URL and a pattern of resources to fetch. can also just list resources | |
require 'nokogiri' | |
require 'open-uri' | |
require 'uri' | |
def print_usage | |
puts "Usage: fetch_things.rb [action=...] [pattern=... pattern=...] [save_to=~/Downloads] URL\n" | |
puts "Actions: view, fetch" | |
end | |
def fetch_url(url) | |
html = Nokogiri::HTML(open(url)) | |
resources = {} | |
html.css('a, img').each do |res| | |
#puts res.inspect | |
_url = nil | |
_title = nil | |
if res.name == 'a' | |
_url = res.attributes['href'] | |
_title = res.text | |
else | |
_url = res.attributes['src'] | |
_title = res.attributes['title'] | |
end | |
resources[_url] = _title | |
end | |
resources | |
end | |
opts = {:url=>nil, :action=>'view', :save_to=>'~/Downloads'} | |
last = nil | |
ARGV.each do |arg| | |
if arg.index('=') | |
k, v = arg.split('=', 2) | |
k = k.to_sym | |
v = Regexp.new(v) if k == :pattern | |
if opts[k] && k == :pattern | |
opts[k] = [opts[k]] if !opts[k].is_a?(Array) | |
opts[k] << v | |
else | |
opts[k] = v | |
end | |
elsif !last | |
last = arg | |
else | |
puts "? ignoring: #{arg}" | |
end | |
end | |
if last | |
if last.index('-h') | |
print_usage() | |
exit | |
else | |
opts[:url] = last | |
end | |
end | |
if opts[:url] == nil | |
puts "X Specify a URL: fetch_things.rb URL", '*****' * 8 | |
print_usage() | |
exit | |
end | |
resources = fetch_url(opts[:url]) | |
if opts[:action] == 'view' | |
resources.each do |url, title| | |
puts "- #{url}" | |
end | |
elsif opts[:action] == 'fetch' | |
if !opts[:pattern] | |
puts "X need to specify at least one pattern" | |
print_usage | |
exit | |
end | |
opts[:pattern] = [opts[:pattern]] if !opts[:pattern].is_a?(Array) | |
puts "> saving to #{opts[:save_to]}" | |
resources.each do |url, title| | |
_get = nil | |
opts[:pattern].each do |pat| | |
if pat.match(url) | |
_get = url | |
break | |
end | |
end | |
if _get | |
if !opts[:dry] | |
parts = _get.to_s.split('/') | |
name = URI.unescape(parts.pop) | |
puts "! #{_get} > #{name}" | |
`curl "#{_get}" > "#{opts[:save_to]}/#{name}"` | |
end | |
elsif opts[:debug] | |
puts "_ #{url}" | |
end | |
end | |
else | |
print_usage | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
want ocarina of time OST? no problem!
fetch_things.rb http://www.mariomayhem.com/downloads/sound_tracks/zelda_ocarina_of_time_ost.php action=fetch pattern=mp3 save_to=~/Downloads