Skip to content

Instantly share code, notes, and snippets.

@miguelmota
Created June 2, 2014 19:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miguelmota/2884c2565f23f82fbcd8 to your computer and use it in GitHub Desktop.
Save miguelmota/2884c2565f23f82fbcd8 to your computer and use it in GitHub Desktop.
Read in an HTML file, look for images, and download with Node.js
var cheerio = require('cheerio');
var fs = require('fs');
var concat = require('concat-stream');
var _ = require('lodash');
var request = require('request');
var stream = fs.createReadStream([__dirname, '/raw.html'].join(''));
var imagesDir = [__dirname, '/images'].join('');
stream.pipe(concat(function(body) {
processData(body.toString());
}));
fs.exists(imagesDir, function(exists) {
if (exists) return true;
fs.mkdir(imagesDir, 0777, function(err) {
if (err) throw new Error(err);
});
});
function download(url, i) {
var filename, filepath, file;
filename = url.split('/');
filename = filename[filename.length - 1];
filepath = [__dirname, '/images/', filename].join('');
function callback() {
console.log(filename, 'done');
}
file = fs.createWriteStream(filepath);
request(url).pipe(file).on('close', callback);
}
function processData(data) {
var $ = cheerio.load(data);
var $images = $('div');
var urls = [];
$images.each(function() {
urls.push($(this).attr('data-href'));
});
_.each(urls, download);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment