Skip to content

Instantly share code, notes, and snippets.

@rkachowski
Last active September 2, 2015 13:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rkachowski/236c554f5f6defa4e355 to your computer and use it in GitHub Desktop.
Save rkachowski/236c554f5f6defa4e355 to your computer and use it in GitHub Desktop.
mass downloadin
require 'oga'
require 'open-uri'
require 'uri'
require 'rest-client'
require 'pry'
require 'thread'
$log_lock = Mutex.new
def get_urls
page_url = ARGV[0]
page = Oga.parse_xml RestClient.get(page_url)
links = page.xpath("//a").map { |l| l.get "href" }
links.shift
linkurls = links.map {|b| URI.encode(page_url+b)}
File.open("urls","w"){ |f| f << linkurls}
end
def download_files
get_urls unless File.exists? "urls"
urls = File.open("urls") {|f|eval(f.read)}
threads = []
urls.each { |url|
threads << create_download_thread(url)
}
threads.each {|t| t.join}
end
def create_download_thread download
Thread.new {
$log_lock.synchronize { puts "downloading #{download}..." }
RestClient.get(download){ |response, request, result,&block|
name = URI.decode File.basename(download)
$log_lock.synchronize { puts "downloaded #{download} ! "}
File.open(name, "w") { |f| f << response}
}
}
end
download_files
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment