Skip to content

Instantly share code, notes, and snippets.

@kaisershahid
Created May 16, 2018 18:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaisershahid/d4f63fca23a57e9b8c046992492a4533 to your computer and use it in GitHub Desktop.
Save kaisershahid/d4f63fca23a57e9b8c046992492a4533 to your computer and use it in GitHub Desktop.
if you have a webpage you want to grab multiple resources from
#!/usr/bin/ruby
# simple downloader: pass a URL and a pattern of resources to fetch. can also just list resources
require 'nokogiri'
require 'open-uri'
require 'uri'
def print_usage
puts "Usage: fetch_things.rb [action=...] [pattern=... pattern=...] [save_to=~/Downloads] URL\n"
puts "Actions: view, fetch"
end
def fetch_url(url)
html = Nokogiri::HTML(open(url))
resources = {}
html.css('a, img').each do |res|
#puts res.inspect
_url = nil
_title = nil
if res.name == 'a'
_url = res.attributes['href']
_title = res.text
else
_url = res.attributes['src']
_title = res.attributes['title']
end
resources[_url] = _title
end
resources
end
opts = {:url=>nil, :action=>'view', :save_to=>'~/Downloads'}
last = nil
ARGV.each do |arg|
if arg.index('=')
k, v = arg.split('=', 2)
k = k.to_sym
v = Regexp.new(v) if k == :pattern
if opts[k] && k == :pattern
opts[k] = [opts[k]] if !opts[k].is_a?(Array)
opts[k] << v
else
opts[k] = v
end
elsif !last
last = arg
else
puts "? ignoring: #{arg}"
end
end
if last
if last.index('-h')
print_usage()
exit
else
opts[:url] = last
end
end
if opts[:url] == nil
puts "X Specify a URL: fetch_things.rb URL", '*****' * 8
print_usage()
exit
end
resources = fetch_url(opts[:url])
if opts[:action] == 'view'
resources.each do |url, title|
puts "- #{url}"
end
elsif opts[:action] == 'fetch'
if !opts[:pattern]
puts "X need to specify at least one pattern"
print_usage
exit
end
opts[:pattern] = [opts[:pattern]] if !opts[:pattern].is_a?(Array)
puts "> saving to #{opts[:save_to]}"
resources.each do |url, title|
_get = nil
opts[:pattern].each do |pat|
if pat.match(url)
_get = url
break
end
end
if _get
if !opts[:dry]
parts = _get.to_s.split('/')
name = URI.unescape(parts.pop)
puts "! #{_get} > #{name}"
`curl "#{_get}" > "#{opts[:save_to]}/#{name}"`
end
elsif opts[:debug]
puts "_ #{url}"
end
end
else
print_usage
end
@kaisershahid
Copy link
Author

kaisershahid commented May 16, 2018

want ocarina of time OST? no problem! fetch_things.rb http://www.mariomayhem.com/downloads/sound_tracks/zelda_ocarina_of_time_ost.php action=fetch pattern=mp3 save_to=~/Downloads

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment