Skip to content

Instantly share code, notes, and snippets.

@ForbesLindesay
Created March 10, 2013 20:46
Show Gist options
  • Save ForbesLindesay/5130350 to your computer and use it in GitHub Desktop.
Save ForbesLindesay/5130350 to your computer and use it in GitHub Desktop.
Download all the octocats using just hyperquest and through
var join = require('path').join;
var fs = require('fs');
var request = require('hyperquest');
var through = require('through');
fs.mkdirSync(join(__dirname, 'octocats'));
request('http://octodex.github.com/')
.pipe(parseURLs())
.pipe(download())
.pipe(format())
.pipe(process.stdout);
function parseURLs() {
var buffer = '';
return through(function (data) {
buffer += data.toString();
var self = this;
var pattern = /(http:\/\/octodex.github.com\/images\/[a-z0-9\-]+\.[a-z]+)/i;
var match;
while (match = pattern.exec(buffer)) {
self.queue(match[1]);
buffer = buffer.substring(match.index + match[1].length);
}
}, function () {
this.queue(null);
});
}
function download() {
var remaining = 0;
function done() {
remaining--;
}
return through(function (url) {
remaining++;
var start = new Date();
var req = request(url);
var file = fs.createWriteStream(join(__dirname, 'octocats', url.replace(/^.*\//g, '')));
req.pipe(file);
var self = this;
req.on('error', function (err) { self.emit('error', err); });
file.on('error', function (err) { self.emit('error', err); });
file.on('close', function () {
self.queue({
url: url,
file: url.replace(/^.*\//g, ''),
start: start,
end: new Date()
});
done();
});
}, function () {
if (remaining === 0) {
this.queue(null);
} else {
var self = this;
done = function () {
remaining--;
if (remaining === 0) self.queue(null);
};
}
});
}
function format() {
return through(function (download) {
this.queue(download.file + ' (' + humanize(download.end - download.start) + ')\n');
})
}
/**
* Humanize the given `ms`.
*
* @param {Number} m
* @return {String}
* @api private
*/
function humanize(ms) {
var sec = 1000
, min = 60 * 1000
, hour = 60 * min;
if (ms >= hour) return (ms / hour).toFixed(1) + 'h';
if (ms >= min) return (ms / min).toFixed(1) + 'm';
if (ms >= sec) return (ms / sec | 0) + 's';
return ms + 'ms';
}
@paulmillr
Copy link

bash still ftw

# async dl all cats
url=http://octodex.github.com/
curl --silent $url | grep --extended-regexp --only-match "/images/[a-zA-Z0-9.-]+" | sed -e 's/\/images\///' | xargs -I file curl "$url/images/file" --silent --output file &; wait

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment