Skip to content

Instantly share code, notes, and snippets.

@Bernardstanislas
Last active August 29, 2015 14:22
Show Gist options
  • Save Bernardstanislas/af3416cfcfb4c51c6c9b to your computer and use it in GitHub Desktop.
Save Bernardstanislas/af3416cfcfb4c51c6c9b to your computer and use it in GitHub Desktop.
OMDB scrapper
var http = require('http');
var fs = require('fs');
var runningSockets = 1000;
var maxId = 10000000;
var filename = 'dump';
if (http.globalAgent.maxSockets < runningSockets) {
http.globalAgent.maxSockets = runningSockets;
}
var file_stream = fs.createWriteStream(__dirname + '/' + filename, {flags : 'w'});
function createRequest(index) {
if (index % (maxId / 100) === 0) {
console.log((index / (maxId / 100)) + '% done');
}
if (index >= maxId) {
process.exit(0);
} else {
var req = http.request({
host: 'www.omdbapi.com',
port: 80,
path: '/?i=tt' + pad(index, 7) + '&plot=short&r=json',
method: 'GET'
}, function(res) {
var msg = '';
res.setEncoding('utf8');
res.on('data', function(chunk) {
msg += chunk;
});
res.on('error', function() {
createRequest(index + runningSockets);
});
res.on('end', function() {
if (msg.slice(0,1) == '{') {
file_stream.write(msg + '\n');
}
createRequest(index + runningSockets);
});
});
req.end();
}
}
for (j = 0; j < runningSockets; j++) {
createRequest(j);
}
function pad(num, size) {
var s = "000000000" + num;
return s.substr(s.length-size);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment