Can be used like this
node scraper.js > results.txt
cat results.txt | awk -F '\t' '{system("curl --create-dirs --retry 5 -z " $2 " -o " $2 " " $3 )}'
var request = require("request"), | |
cheerio = require("cheerio"), | |
root = "http://gamemusic.siroro.co.uk/"; | |
function process(url, dir) { | |
request(url, function(error, response, body) { | |
if (!error) { | |
var $ = cheerio.load(body), | |
filenames = $("span.file-name"), | |
alert = $("div.alert")[0]; | |
if (!alert) { | |
filenames.each(function(index, element) { | |
var name = $(element).text().replace(/^\s+/, "").replace(/\s+$/, ""), | |
link = root + $(element).parent().parent().attr("href"), | |
folder = $(element).children(".fa-folder")[0]; | |
if (name == "..") {} // do nothing | |
else if (!folder) { | |
console.log("FOUND:\t\""+dir+name+"\"\t"+link); | |
} else { | |
setTimeout(process(link, dir+name+"/"), 0); | |
} | |
}); | |
} | |
} else { | |
console.log("RESPONSE ERROR: "+error); | |
} | |
}); | |
} | |
process(root, ""); |