#! /usr/bin/env ruby | |
# usage: | |
# $ das_download.rb email password [download_directory] | |
require 'mechanize' | |
# gem 'mechanize-progressbar' | |
email = ARGV[0] or raise('Please provide the email address for your account') | |
password = ARGV[1] or raise('Please provide the password for your account') | |
path = ARGV[2] || './' | |
download = lambda do |url, file| | |
agent = Mechanize.new | |
agent.get 'https://www.destroyallsoftware.com/screencasts/users/sign_in' | |
form = agent.page.forms.first | |
form['user[email]'] = email | |
form['user[password]'] = password | |
form.submit | |
agent.pluggable_parser.default = Mechanize::Download | |
agent.get(url).save(file) | |
end | |
agent = Mechanize.new | |
agent.get 'https://www.destroyallsoftware.com/screencasts/catalog' | |
screencasts = agent.page.search('li.screencast') | |
while screencast = screencasts.pop | |
title = screencast.search('a').first.text | |
url = screencast.search('.download_link > a:first-child').first['href'] | |
index = screencasts.size | |
file = "#{path}/#{"%03d" % (index + 1)}-#{title.gsub(/\.|:|,/, '').gsub(/\/|\s/, '-').downcase}.mov" | |
puts "Downloading #{title} - #{index} to go" | |
next puts 'Already Downloaded' if File.exist? file | |
download[url, file] | |
end |
This comment has been minimized.
This comment has been minimized.
Allright! thanks!
Excerpts from Wael M. Nasreddine's message of Mon Feb 27 17:54:21 -0600 2012:
… Hey @maca, really useful Gist, thank you
I've updated it a little bit, basically, what you're doing is downloading the screencasts in a reverse order, so the first screencast is actually number 54 and the last screencast (the newest) is screencast 1, which is just wrong, because the next time a screencast is released, your script will download all of them all over again because the numbering has changed (first or oldest one becomes 55 by then)
Check the gist here https://gist.github.com/1927927/25831a6c53734b90d627175147f332ce22089fdb
---
Reply to this email directly or view it on GitHub:
https://gist.github.com/1798070
|
This comment has been minimized.
This comment has been minimized.
No Problem, Thank you :) |
This comment has been minimized.
This comment has been minimized.
nice one |
This comment has been minimized.
This comment has been minimized.
I would actually not prefix the index to the file name at all. I tried running this script again and the index to screencast mapping changed. |
This comment has been minimized.
This comment has been minimized.
Actually, this is what worked for me: ... while screencast = screencasts.pop |
This comment has been minimized.
This comment has been minimized.
made some changes to suit my prefs and updated to match the new layout of DAS: https://gist.github.com/jasondew/5583811 |
This comment has been minimized.
This comment has been minimized.
i made a realllllllllllly hacky version of this. however, it works and downloads in parallel! #! /usr/bin/env ruby
# usage:
# $ das_download.rb email password [download_directory]
require 'mechanize'
# gem 'mechanize-progressbar'
email = ARGV[0] or raise('Please provide the email address for your account')
password = ARGV[1] or raise('Please provide the password for your account')
agent = Mechanize.new
agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.100 Safari/537.36'
agent.get 'https://www.destroyallsoftware.com/screencasts/users/sign_in'
form = agent.page.forms.first
form['user[email]'] = email
form['user[password]'] = password
form.submit
agent.pluggable_parser.default = Mechanize::Download
agent.get 'https://www.destroyallsoftware.com/screencasts/catalog'
episodes = agent.page.search('.episode > a').map { |a| a['href'] }
episodes_meta = episodes.map do |screencast, index|
page = agent.get 'https://www.destroyallsoftware.com' + screencast
# take a bath after this
url = page.search('video + script').first.children.first.to_s.match(/\s+source\.src\s=\s"(.+?)"/)[1]
title = URI(url).path[1..-1]
{ url: url, title: title }
end
puts "About to download #{episodes_meta.length} episodes... hold tight."
episodes_meta.each_slice(14) do |slice|
fork do
slice.each do |meta|
system('wget', '--quiet', '-O', meta[:title], meta[:url])
puts "Finished #{meta[:title]}"
end
end
end
Process.waitall |
This comment has been minimized.
This comment has been minimized.
@ianks Thanks a ton for this script! Note that you have to have |
This comment has been minimized.
This comment has been minimized.
I wrote a small script to grab all the compendium articles, with their images and css so you can read and review offline: https://gist.github.com/AlessandroMinali/fbb9532d5db1f568481bca1f9c2cb9f5 |
This comment has been minimized.
This comment has been minimized.
I've forked https://gist.github.com/jasondew/5583811 and updated it to work with the latest DAS which is free this week: https://gist.github.com/jaredculp/f26f83d214cf926472dddd4269bd2538 |
This comment has been minimized.
This comment has been minimized.
On season three and haven't hit any problems. Thanks for this! |
This comment has been minimized.
This comment has been minimized.
Hacked together another fork https://gist.github.com/itsgoingd/4e6f9b663a825143ebd6997806931e73
|
This comment has been minimized.
Hey @maca, really useful Gist, thank you
I've updated it a little bit, basically, what you're doing is downloading the screencasts in a reverse order, so the first screencast is actually number 54 and the last screencast (the newest) is screencast 1, which is just wrong, because the next time a screencast is released, your script will download all of them all over again because the numbering has changed (first or oldest one becomes 55 by then)
Check the gist here https://gist.github.com/1927927