Skip to content

Instantly share code, notes, and snippets.

@shirishp
Last active February 22, 2019 21:17
Show Gist options
  • Save shirishp/b594303ec63f6d9c075b to your computer and use it in GitHub Desktop.
Save shirishp/b594303ec63f6d9c075b to your computer and use it in GitHub Desktop.
Scrape Dilbert strips from http://dilbert.com/ for given duration
var request = require("request"),
cheerio = require("cheerio"),
fs = require('fs');
var site = "http://dilbert.com";
var startingPage = "/strip/2005-01-01";
var finalPage = '/strip/2006-01-01';
var nextPage = startingPage;
function downloadFileInFolder(fromUrl, fileName) {
request(fromUrl).pipe(fs.createWriteStream(fileName));
console.log('Saved image ' + fileName);
}
var scrapeStrip = function (error, response, body) {
console.log('Now at ' + site + nextPage);
if (!error) {
var $ = cheerio.load(body);
var stripImageUrl = $(".img-comic").attr('src');
console.log("Found strip at " + stripImageUrl);
downloadFileInFolder(stripImageUrl, nextPage.substr(1) + '.jpg');
nextPage = $('.nav-right>a').attr('href');
console.log('Next page is ' + nextPage);
if (nextPage != finalPage) {
request(site + nextPage, null, scrapeStrip);
}
} else {
console.log("We’ve encountered an error: " + error);
}
};
request(site + nextPage, null, scrapeStrip);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment