Skip to content

Instantly share code, notes, and snippets.

@cjavdev
Forked from forresty/railscasts_download.rb
Last active January 3, 2016 04:59
Show Gist options
  • Save cjavdev/8412946 to your computer and use it in GitHub Desktop.
Save cjavdev/8412946 to your computer and use it in GitHub Desktop.
pages/*
episode_pages/*
videos/*
cookies.txt
#!/usr/bin/env ruby
require "nokogiri"
require "open-uri"
require "thread/pool"
$pool = Thread.pool(24)
def download(url, output_file)
unless File.exists?(output_file)
$pool.process { system("wget -c #{url} --load-cookies=cookies.txt -O #{output_file}") }
end
end
def download_pages(max = 48)
(1..max).each do |i|
download "http://railscasts.com/?page=#{i}", "pages/page#{i}.html"
end
end
def extract_episode_links
links = []
Dir["pages/*.html"].each do |page_html|
doc = Nokogiri::HTML(open(page_html))
doc.css("div.episode h2 a").each do |link|
links << "http://railscasts.com#{link['href']}".tap { |l| puts l }
end
end
links
end
def extract_episode_name(episode_link)
episode_link.match(/.*\/(.+)/)[1]
end
def download_episode_pages(episode_link)
download episode_link, "episode_pages/#{extract_episode_name(episode_link)}.html"
end
def extract_mp4_links
links = []
Dir["episode_pages/*.html"].each do |page_html|
# puts page_html
doc = Nokogiri::HTML(open(page_html))
doc.css("a").each do |link|
links << link['href'].tap { |link| puts link } if link['href'] =~ /mp4$/
end
end
links
end
trap(:INT) {
puts "INT signal caught, now exiting"
exit
}
# step 1: mkdir pages then ->
#download_pages(48)
# step 2: mkdir episode_pages then ->
#extract_episode_links.each { |link| download_episode_pages(link) }
# step 3:
extract_mp4_links.each { |link| download link, "videos/#{extract_episode_name(link)}" }
# puts extract_episode_name("http://railscasts.com/episodes/90-fragment-caching-revised")
$pool.shutdown
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment