Skip to content

Instantly share code, notes, and snippets.

@spraints
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save spraints/e0624b85df60abdd8b5d to your computer and use it in GitHub Desktop.
Save spraints/e0624b85df60abdd8b5d to your computer and use it in GitHub Desktop.
Download some Adam and Burt Crime Fighting Adventures
#!/bin/bash -e
bundle --path .bundle --quiet
exec bundle exec ruby dl.rb "$@"
require "nokogiri"
require "open-uri"
require "typhoeus"
Urls = {
:s1 => "https://archive.org/details/AdamAndBurtInTights",
:s2 => "https://archive.org/details/AdamAndBurtInTights2",
:s3 => "https://archive.org/details/AdamAndBurtInTights3",
}
class App
def run
episodes.each do |episode|
downloader.queue_episode episode
end
downloader.run
end
private
def episodes
@episodes ||= EpisodeLister.new(Urls)
end
def downloader
@downloader ||= EpisodeDownloader.new
end
end
class EpisodeLister
def initialize(season_archives)
@season_archives = season_archives
end
def each
@season_archives.each do |season, url|
each_episode(season.to_s, url) do |episode|
yield episode
end
end
end
private
def each_episode(season, url)
html = Nokogiri::HTML(read(season, url))
uri = URI(url)
hrefs = html.css("a[href]").map { |a| uri.merge(a["href"]).to_s }
video_hrefs = filter season, hrefs, ".mp4"
thumb_hrefs = filter season, hrefs, ".jpg"
video_hrefs.each do |episode_number, video_href|
yield Episode.new(:season => season, :episode => episode_number, :video => video_href, :thumb => thumb_hrefs[episode_number])
end
end
def filter(season, hrefs, ending)
hrefs.each_with_object({}) do |href, h|
if href =~ /#{season}(e\d+).*#{Regexp.escape(ending)}\z/i
h[$1.downcase] = href
end
end
end
def read(season, url)
cache_path = "#{season}.html"
if File.exist?(cache_path)
File.read(cache_path)
else
open(url) { |f| f.read }.tap do |data|
File.open(cache_path, "w") do |f|
f.write data
end
end
end
end
end
class EpisodeDownloader
def queue_episode(episode)
queue_download(episode.video_source, episode.video_dest)
queue_download(episode.thumb_source, episode.thumb_dest)
end
def run
hydra.run
ensure
progress.all_done
end
private
def queue_download(source, dest)
progress.queued dest
if File.exist?(dest)
progress.skipped dest
return
end
request = Typhoeus::Request.new(source, :followlocation => true)
tmpdest = "#{dest}.partial"
file = nil
request.on_headers do |response|
progress.started dest, :total => response.headers["Content-Length"]
ensure_dir(File.dirname(dest))
file = File.open(tmpdest, "wb")
end
request.on_body do |chunk|
progress.chunk dest, :size => chunk.bytesize
file.write chunk
end
request.on_complete do
progress.finished dest
file.close
File.rename tmpdest, dest
end
hydra.queue request
end
def progress
@progress ||= ProgressReporter.new
end
def hydra
@hydra ||= Typhoeus::Hydra.new(:max_concurrency => concurrency)
end
def concurrency
@concurrency ||= (ENV["CONCURRENCY"] || 2).to_i
end
def ensure_dir(dirname)
@dirs ||= {"." => true}
return if @dirs[dirname] ||= Dir.exist?(dirname)
ensure_dir(File.dirname(dirname))
Dir.mkdir(dirname)
@dirs[dirname] = true
end
end
class ProgressReporter
def initialize
@work = {}
@finished = 0
@working = []
end
def queued(dest)
@work[dest] = ProgressItem.new(dest)
write_progress_line
end
def skipped(dest)
@finished += 1
write_progress_line
end
def started(dest, options)
@work[dest].total_bytes = options.fetch(:total).to_i
@working << dest
write_progress_line
end
def chunk(dest, options)
@work[dest].read_bytes += options.fetch(:size)
write_progress_line
end
def finished(dest)
@working.delete dest
write_progress_line
end
def all_done
puts "#{progress_line}\n"
end
private
def write_progress_line
print "#{progress_line}\r"
end
def progress_line
"[#{@finished}/#{@work.size}]".tap do |s|
@working.each do |dest|
s << " [#{@work[dest]}]"
end
end.ljust(`tput cols`.to_i)
end
class ProgressItem
def initialize(dest_path)
@dest_path = dest_path
end
attr_reader :dest_path
def to_s
"#{episode_number} #{read_bytes}/#{total_bytes}"
end
def episode_number
@episode_number ||= File.basename(dest_path).split("-").first
end
def read_bytes
@read_bytes ||= SizeT.new
end
def read_bytes=(v)
read_bytes.assign v
end
def total_bytes
@total_bytes ||= SizeT.new
end
def total_bytes=(v)
total_bytes.assign v
end
end
class SizeT
def initialize(value = 0)
assign value
end
attr_accessor :value
def assign(value)
case value
when Fixnum
self.value = value
when SizeT
self.value = value.value
else
raise "Unsupported: can not assign #{value.class.name} to #{self.class.name}#value"
end
end
def +(other)
self.class.new(value.to_i + other.to_i)
end
def to_i
value.to_i
end
def to_s
bytes = value.to_i
if bytes < 1024
bytes.to_s
else
reduce(bytes / 1024.0)
end
end
def reduce(x, suffixes = %w(K M G P))
suffix, *rest = suffixes
if x < 10
"%.2f%s" % [x, suffix]
elsif x < 100
"%.1f%s" % [x, suffix]
elsif x < 1000 || rest.empty?
"%d%s" % [x, suffix]
else
reduce(x / 1024, rest)
end
end
end
end
class Episode
def initialize(data)
@season = data.fetch(:season)
@episode = data.fetch(:episode)
@video = data.fetch(:video)
@thumb = data.fetch(:thumb)
end
def video_source
@video
end
def thumb_source
@thumb
end
def video_dest
dest_for "mp4"
end
def thumb_dest
dest_for "jpg"
end
private
def dest_for(suffix)
"#{@season}/#{@season}#{@episode}-#{desc}.#{suffix}"
end
def desc
@desc ||= File.basename(@video).split("-")[2].split(".")[0]
end
end
App.new.run
source "https://rubygems.org"
gem "nokogiri"
gem "typhoeus", ">= 0.6.7"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment