Skip to content

Instantly share code, notes, and snippets.

@jwilkins
Last active March 25, 2020 13:15
Show Gist options
  • Save jwilkins/5308913 to your computer and use it in GitHub Desktop.
Save jwilkins/5308913 to your computer and use it in GitHub Desktop.
parallel http/https download with typhoeus
#!/usr/bin/env ruby
# 25s curl -o all.gz.curl http://download.openwall.net/pub/passwords/wordlists/all.gz
# 11s for this script with 4 parallel requests
require 'typhoeus'
def pfetch(url, splits=4)
response = Typhoeus.head(url)
parallelize = false
basename = File.basename(url)
size = nil
if response.response_headers =~ /Content-Length: (\d+)/i
size = $1.to_i
end
if response.response_headers =~ /Accept-Ranges: (.*)$/i
ranges = $1
parallelize = true if size > 1_000_000 && ranges.strip.downcase == 'bytes'
end
unless parallelize
open(basename, 'wb') { |ff| ff << Typhoeus.get(url).body }
else
part_size = size / splits
offset = 0
ranges = []
(0..splits-1).each_with_index { |split, ii|
last = offset+part_size-1
last = size if last > size
ranges << [offset, last]
offset += part_size
}
hydra = Typhoeus::Hydra.new
ranges.each_with_index { |range, ii|
req = Typhoeus::Request.new(url, followlocation: true, :headers => { "Range" => "bytes=#{range[0]}-#{range[1]}"})
req.on_complete do |resp|
if resp.code >199 && resp.code < 300
puts "#{ii} done, #{part_size} bytes in #{resp.time}"
open("#{basename}.part_%03d" % ii, 'wb+'){ |ff| ff << resp.body }
else
puts "#{ii} returned #{resp.code}"
end
end
hydra.queue(req)
}
hydra.run
# combine parts
open(basename, 'wb+'){ |ff|
(0..ranges.length-1).each { |ii|
partname = "#{basename}.part_%03d" % ii
open(partname, 'rb') { |pf|
while (buf = pf.read(524288))
ff << buf
end
}
File.unlink(partname)
}
}
end
end
(([] << ARGV).flatten).each { |url|
pfetch(url, 4)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment