Last active
August 29, 2015 14:01
-
-
Save spraints/e0624b85df60abdd8b5d to your computer and use it in GitHub Desktop.
Download some Adam and Burt Crime Fighting Adventures
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -e | |
bundle --path .bundle --quiet | |
exec bundle exec ruby dl.rb "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "nokogiri" | |
require "open-uri" | |
require "typhoeus" | |
Urls = { | |
:s1 => "https://archive.org/details/AdamAndBurtInTights", | |
:s2 => "https://archive.org/details/AdamAndBurtInTights2", | |
:s3 => "https://archive.org/details/AdamAndBurtInTights3", | |
} | |
class App | |
def run | |
episodes.each do |episode| | |
downloader.queue_episode episode | |
end | |
downloader.run | |
end | |
private | |
def episodes | |
@episodes ||= EpisodeLister.new(Urls) | |
end | |
def downloader | |
@downloader ||= EpisodeDownloader.new | |
end | |
end | |
class EpisodeLister | |
def initialize(season_archives) | |
@season_archives = season_archives | |
end | |
def each | |
@season_archives.each do |season, url| | |
each_episode(season.to_s, url) do |episode| | |
yield episode | |
end | |
end | |
end | |
private | |
def each_episode(season, url) | |
html = Nokogiri::HTML(read(season, url)) | |
uri = URI(url) | |
hrefs = html.css("a[href]").map { |a| uri.merge(a["href"]).to_s } | |
video_hrefs = filter season, hrefs, ".mp4" | |
thumb_hrefs = filter season, hrefs, ".jpg" | |
video_hrefs.each do |episode_number, video_href| | |
yield Episode.new(:season => season, :episode => episode_number, :video => video_href, :thumb => thumb_hrefs[episode_number]) | |
end | |
end | |
def filter(season, hrefs, ending) | |
hrefs.each_with_object({}) do |href, h| | |
if href =~ /#{season}(e\d+).*#{Regexp.escape(ending)}\z/i | |
h[$1.downcase] = href | |
end | |
end | |
end | |
def read(season, url) | |
cache_path = "#{season}.html" | |
if File.exist?(cache_path) | |
File.read(cache_path) | |
else | |
open(url) { |f| f.read }.tap do |data| | |
File.open(cache_path, "w") do |f| | |
f.write data | |
end | |
end | |
end | |
end | |
end | |
class EpisodeDownloader | |
def queue_episode(episode) | |
queue_download(episode.video_source, episode.video_dest) | |
queue_download(episode.thumb_source, episode.thumb_dest) | |
end | |
def run | |
hydra.run | |
ensure | |
progress.all_done | |
end | |
private | |
def queue_download(source, dest) | |
progress.queued dest | |
if File.exist?(dest) | |
progress.skipped dest | |
return | |
end | |
request = Typhoeus::Request.new(source, :followlocation => true) | |
tmpdest = "#{dest}.partial" | |
file = nil | |
request.on_headers do |response| | |
progress.started dest, :total => response.headers["Content-Length"] | |
ensure_dir(File.dirname(dest)) | |
file = File.open(tmpdest, "wb") | |
end | |
request.on_body do |chunk| | |
progress.chunk dest, :size => chunk.bytesize | |
file.write chunk | |
end | |
request.on_complete do | |
progress.finished dest | |
file.close | |
File.rename tmpdest, dest | |
end | |
hydra.queue request | |
end | |
def progress | |
@progress ||= ProgressReporter.new | |
end | |
def hydra | |
@hydra ||= Typhoeus::Hydra.new(:max_concurrency => concurrency) | |
end | |
def concurrency | |
@concurrency ||= (ENV["CONCURRENCY"] || 2).to_i | |
end | |
def ensure_dir(dirname) | |
@dirs ||= {"." => true} | |
return if @dirs[dirname] ||= Dir.exist?(dirname) | |
ensure_dir(File.dirname(dirname)) | |
Dir.mkdir(dirname) | |
@dirs[dirname] = true | |
end | |
end | |
class ProgressReporter | |
def initialize | |
@work = {} | |
@finished = 0 | |
@working = [] | |
end | |
def queued(dest) | |
@work[dest] = ProgressItem.new(dest) | |
write_progress_line | |
end | |
def skipped(dest) | |
@finished += 1 | |
write_progress_line | |
end | |
def started(dest, options) | |
@work[dest].total_bytes = options.fetch(:total).to_i | |
@working << dest | |
write_progress_line | |
end | |
def chunk(dest, options) | |
@work[dest].read_bytes += options.fetch(:size) | |
write_progress_line | |
end | |
def finished(dest) | |
@working.delete dest | |
write_progress_line | |
end | |
def all_done | |
puts "#{progress_line}\n" | |
end | |
private | |
def write_progress_line | |
print "#{progress_line}\r" | |
end | |
def progress_line | |
"[#{@finished}/#{@work.size}]".tap do |s| | |
@working.each do |dest| | |
s << " [#{@work[dest]}]" | |
end | |
end.ljust(`tput cols`.to_i) | |
end | |
class ProgressItem | |
def initialize(dest_path) | |
@dest_path = dest_path | |
end | |
attr_reader :dest_path | |
def to_s | |
"#{episode_number} #{read_bytes}/#{total_bytes}" | |
end | |
def episode_number | |
@episode_number ||= File.basename(dest_path).split("-").first | |
end | |
def read_bytes | |
@read_bytes ||= SizeT.new | |
end | |
def read_bytes=(v) | |
read_bytes.assign v | |
end | |
def total_bytes | |
@total_bytes ||= SizeT.new | |
end | |
def total_bytes=(v) | |
total_bytes.assign v | |
end | |
end | |
class SizeT | |
def initialize(value = 0) | |
assign value | |
end | |
attr_accessor :value | |
def assign(value) | |
case value | |
when Fixnum | |
self.value = value | |
when SizeT | |
self.value = value.value | |
else | |
raise "Unsupported: can not assign #{value.class.name} to #{self.class.name}#value" | |
end | |
end | |
def +(other) | |
self.class.new(value.to_i + other.to_i) | |
end | |
def to_i | |
value.to_i | |
end | |
def to_s | |
bytes = value.to_i | |
if bytes < 1024 | |
bytes.to_s | |
else | |
reduce(bytes / 1024.0) | |
end | |
end | |
def reduce(x, suffixes = %w(K M G P)) | |
suffix, *rest = suffixes | |
if x < 10 | |
"%.2f%s" % [x, suffix] | |
elsif x < 100 | |
"%.1f%s" % [x, suffix] | |
elsif x < 1000 || rest.empty? | |
"%d%s" % [x, suffix] | |
else | |
reduce(x / 1024, rest) | |
end | |
end | |
end | |
end | |
class Episode | |
def initialize(data) | |
@season = data.fetch(:season) | |
@episode = data.fetch(:episode) | |
@video = data.fetch(:video) | |
@thumb = data.fetch(:thumb) | |
end | |
def video_source | |
@video | |
end | |
def thumb_source | |
@thumb | |
end | |
def video_dest | |
dest_for "mp4" | |
end | |
def thumb_dest | |
dest_for "jpg" | |
end | |
private | |
def dest_for(suffix) | |
"#{@season}/#{@season}#{@episode}-#{desc}.#{suffix}" | |
end | |
def desc | |
@desc ||= File.basename(@video).split("-")[2].split(".")[0] | |
end | |
end | |
App.new.run |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source "https://rubygems.org" | |
gem "nokogiri" | |
gem "typhoeus", ">= 0.6.7" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment