Skip to content

Instantly share code, notes, and snippets.

@russellbeattie
Created March 21, 2013 09:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save russellbeattie/5211900 to your computer and use it in GitHub Desktop.
Save russellbeattie/5211900 to your computer and use it in GitHub Desktop.
var utils = require('../utils.js'),
pg = require('pg').native,
http = require('http'),
httpreq = require('./httpreq.js'),
async = require('async'),
events = require('events'),
lodash = require('lodash');
var app = new events.EventEmitter();
var config = require('../config.js');
pg.defaults.user = config.db.user;
pg.defaults.database = config.db.database;
pg.defaults.host = config.db.host;
pg.defaults.poolSize = 10;
pg.defaults.rows = 1;
http.globalAgent.maxSockets = 1000;
var db = new pg.Client();
db.connect();
var keys = [];
count = 0;
var start = new Date();
console.log('Starting');
var idsSQL = 'select id, sourceurl from allsources where errorcode != 404 order by lastupdate';
if(process.argv.length > 2) {
var idsSQL = 'select id, sourceurl from allsources where id = ' + process.argv[2];
}
var q = async.queue(function (row, callback) {
app.emit('processSource', row, callback);
}, 400);
var query = db.query(idsSQL);
query.on('row', function(row) {
//app.emit('processSource', row);
q.push(row, function(){
//console.log(row);
});
});
process.on('exit', function () {
var end = new Date();
console.log('Elapsed time: ' + ((end - start)/10000));
console.dir(keys);
});
app.on('checkCount', function(){
count--;
if(count == 0){
db.end();
}
if(count % 1000 == 0){
console.log(count);
}
});
app.on('processSource', function(source, callback) {
var id = source.id;
if(!source.sourceurl) {
logError('No url', source.id);
return;
}
count++;
var req = {};
req.headers = {};
req.headers['User-Agent'] = config.userAgent;
/*
if(source.lastupdate) {
req.headers['If-Modified-Since'] = source.lastupdate.toUTCString();
}
if(source.etag) {
req.headers['If-None-Match'] = source.etag;
}
*/
req.timeout = 30 * 60 * 1000;
req.method = 'HEAD';
httpreq.get(source.sourceurl, req, function(err, response){
if(response){
var statusCode = response.statusCode;
if(statusCode < 400){
code = utils.colorOutput(statusCode, 'green');
} else {
code = utils.colorOutput(statusCode, 'red');
}
console.log('-', code, utils.colorOutput(source.id, 'blue'), source.sourceurl);
if(statusCode == 404){
app.emit('error', source.id, 404, JSON.stringify(response));
} else if(statusCode >= 400){
app.emit('error', source.id, statusCode, JSON.stringify(response));
} else {
/*
source.statusCode = statusCode;
if (statusCode == 200) {
var updateSourceSQL = 'update allsources set lastupdate = now(), etag = $1, cache = $2 where id = $3';
var query = db.query(updateSourceSQL, [response.headers.etag, response.body, source.id]);
query.on('error', function(err){
logError('Update Source error', err);
});
}
*/
}
}
if(err){
logError('Request Error: ' + source.id + ' - ' + source.sourceurl, err);
app.emit('error', source.id, 999, err.toString());
}
app.emit('checkCount');
try{
callback();
}catch(err){
console.error(err);
}
});
});
app.on('error', function(id, errorcode, errorinfo) {
var updateErrorsSQL = 'update allsources set errors = errors + 1, errorcode = $1, errorinfo = $2 where id = $3';
var query = db.query(updateErrorsSQL, [errorcode, errorinfo, id]);
query.on('error', function(err){
logError('Error error', err);
});
});
function log(source){
console.log('-', utils.colorOutput(source.id, 'blue'), source.sourceurl);
}
function logError(message, err){
console.error(utils.colorOutput('*** ' + message, 'red'), err);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment