Skip to content

Instantly share code, notes, and snippets.

@trevex
Created August 25, 2015 19:56
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trevex/b8392fb5be2de777b898 to your computer and use it in GitHub Desktop.
Save trevex/b8392fb5be2de777b898 to your computer and use it in GitHub Desktop.
simple web scraper to download the audio from gamemusic.siroro.co.uk/
var request = require("request"),
cheerio = require("cheerio"),
root = "http://gamemusic.siroro.co.uk/";
function process(url, dir) {
request(url, function(error, response, body) {
if (!error) {
var $ = cheerio.load(body),
filenames = $("span.file-name"),
alert = $("div.alert")[0];
if (!alert) {
filenames.each(function(index, element) {
var name = $(element).text().replace(/^\s+/, "").replace(/\s+$/, ""),
link = root + $(element).parent().parent().attr("href"),
folder = $(element).children(".fa-folder")[0];
if (name == "..") {} // do nothing
else if (!folder) {
console.log("FOUND:\t\""+dir+name+"\"\t"+link);
} else {
setTimeout(process(link, dir+name+"/"), 0);
}
});
}
} else {
console.log("RESPONSE ERROR: "+error);
}
});
}
process(root, "");

Can be used like this

node scraper.js > results.txt 
cat results.txt | awk -F '\t' '{system("curl --create-dirs --retry 5 -z " $2 " -o " $2 " " $3 )}'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment