Skip to content

Instantly share code, notes, and snippets.

@yuanchuan
Created August 2, 2012 02:12
Show Gist options
  • Save yuanchuan/3232510 to your computer and use it in GitHub Desktop.
Save yuanchuan/3232510 to your computer and use it in GitHub Desktop.
download pages with nodejs
var fs = require('fs')
, http = require('http')
, path = require('path')
, cheerio = require('cheerio');
var archive = 'http://apod.nasa.gov/apod/archivepix.html'
, base = path.dirname(archive) + '/'
, target = 'archive/english/';
get(archive, function(html) {
var $ = cheerio.load(html)
, links = $('b').find('a')
, total = links.length
, count = 0;
links.each(function() {
var url = $(this).attr('href');
save(base + url, target + url, function() {
console.log('got %s, total: %s/%s', url, ++count, total);
});
});
});
function get(url, callback) {
var body = '';
http.get(url, function(res) {
res.setEncoding('utf-8');
res.on('data', function(chunk) {
body += chunk;
});
res.on('end', function() {
callback(body);
});
}).on('error', function(e) {
console.log('Got error: ' + e.message);
});
}
function save(remote, target, callback) {
if (fs.existsSync(target)) {
callback();
} else {
http.get(remote, function(res) {
res.setEncoding('utf-8');
res.on('end', callback);
res.pipe(fs.createWriteStream(target));
});
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment