Skip to content

Instantly share code, notes, and snippets.

@Maffsie
Created November 16, 2009 17:07
Show Gist options
  • Save Maffsie/236140 to your computer and use it in GitHub Desktop.
Save Maffsie/236140 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'net/http'
require 'sqlite3'
require 'hpricot'
require 'open-uri'
category_ids = [100, 200, 300, 400, 600]
domain = 'thepiratebay.org'
url = '/browse/%s/%s/3'
db = SQLite3::Database.new('torrents.sqlite')
db.execute('CREATE TABLE IF NOT EXISTS `torrents` (`url` TEXT NOT NULL,`filename` TEXT NOT NULL,`description` TEXT NULL)')
category_ids.each do |category|
page_num = 0
last_page = false
while !last_page
Net::HTTP.start(domain) { |http|
resp = http.get(url % [category, page_num])
$page = resp.body
}
if /Forbidden.$/.match($page)
last_page = true
end
# Hpricot shit
# Parse torrent page
parsepage = Hpricot.parse($page)
torrent_num = 0
# For each torrent on the page, download it's webpage, output the torrent count and title
# Then get the torrent description and download link.
(parsepage/"//a[@class=\"detLink\"]").each do |parseTor|
torrent_num = torrent_num + 1
torrent = parseTor.attributes["href"].to_s
# puts "Torrent number #{torrent_num} - #{parseTor.inner_html.to_s}"
$URL = domain + torrent
Net::HTTP.start(domain) { |http|
resp = http.get(torrent)
$torrent = resp.body
}
parseTor = Hpricot.parse($torrent)
$torrent = ''
(parseTor/"//div[@class=\"nfo\"]").each do |torDesc|
$desc = torDesc.inner_html.to_s
end
downTor = parseTor.search("//div[@class=\"download\"]")
$download = downTor.at('a')['href'].to_s
# puts "Download link: #{downTor.at('a')['href'].to_s}"
# Using a ruby-wget code snippet from Dzone - http://snippets.dzone.com/posts/show/4656
begin
torrentData = open($download,
'User-Agent' => 'Ruby-Wget').read
rescue URI::InvalidURIError
# puts "Could not download torrent file."
else
# puts "Torrent downloaded with filename #{$torFile}"
end
$torFile = $download.gsub(/http:\/\/torrents.thepiratebay.org\/[0-9]{1,}\//,'')
filename = "torrents/#{$torFile}"
File.open(filename, 'w') {|f| f.write(torrentData) }
db.execute('INSERT INTO `torrents` (url, filename, description) VALUES (?, ?, ?)', $download, $torFile, $desc)
end
page_num = page_num + 1
page_num_real = page_num + 1
# puts "Moving to page number #{page_num_real}"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment