Skip to content

Instantly share code, notes, and snippets.

@alexanderGugel
Created July 16, 2014 01:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexanderGugel/bad6ffe1f48adc6a09c3 to your computer and use it in GitHub Desktop.
Save alexanderGugel/bad6ffe1f48adc6a09c3 to your computer and use it in GitHub Desktop.
The Pirate Bay scraper
var request = require('request'),
_ = require('lodash');
// 100: Audio
// 200: Video
// 300: Applications
// 400: Games
// 500: Porn
// 600: Other
var categories = [100, 200, 300, 400, 500, 600];
// categoryCode: One of the categories specified above.
// page: Page, starts with 0
// order: 7 (Seeders) or 9 (Leechers) (descending)
var createURL = function (categoryCode, page, order) {
return 'http://thepiratebay.se/browse/' + categoryCode + '/' + page + '/' + order;
};
var extractMagnetURIs = function (body) {
var parsed = body.match(/\"magnet:\?\S+\"/g),
attr;
return _.map(parsed, function (magnetURI) {
attr = magnetURI.split('');
attr.pop(); // remove first "
attr.shift(); // remove last "
return attr.join('');
});
};
var onResponse = function (err, resp, body) {
if (err) {
return console.log('Error scraping ' + resp);
}
console.log(extractMagnetURIs(body));
};
// Scrape the first page of each category. Ordered by leechers (descending).
_.each(categories, function (categoryCode) {
var url = createURL(categoryCode, 0, 9);
request(url, onResponse);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment