Created
March 21, 2013 09:54
-
-
Save russellbeattie/5211900 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var utils = require('../utils.js'), | |
pg = require('pg').native, | |
http = require('http'), | |
httpreq = require('./httpreq.js'), | |
async = require('async'), | |
events = require('events'), | |
lodash = require('lodash'); | |
var app = new events.EventEmitter(); | |
var config = require('../config.js'); | |
pg.defaults.user = config.db.user; | |
pg.defaults.database = config.db.database; | |
pg.defaults.host = config.db.host; | |
pg.defaults.poolSize = 10; | |
pg.defaults.rows = 1; | |
http.globalAgent.maxSockets = 1000; | |
var db = new pg.Client(); | |
db.connect(); | |
var keys = []; | |
count = 0; | |
var start = new Date(); | |
console.log('Starting'); | |
var idsSQL = 'select id, sourceurl from allsources where errorcode != 404 order by lastupdate'; | |
if(process.argv.length > 2) { | |
var idsSQL = 'select id, sourceurl from allsources where id = ' + process.argv[2]; | |
} | |
var q = async.queue(function (row, callback) { | |
app.emit('processSource', row, callback); | |
}, 400); | |
var query = db.query(idsSQL); | |
query.on('row', function(row) { | |
//app.emit('processSource', row); | |
q.push(row, function(){ | |
//console.log(row); | |
}); | |
}); | |
process.on('exit', function () { | |
var end = new Date(); | |
console.log('Elapsed time: ' + ((end - start)/10000)); | |
console.dir(keys); | |
}); | |
app.on('checkCount', function(){ | |
count--; | |
if(count == 0){ | |
db.end(); | |
} | |
if(count % 1000 == 0){ | |
console.log(count); | |
} | |
}); | |
app.on('processSource', function(source, callback) { | |
var id = source.id; | |
if(!source.sourceurl) { | |
logError('No url', source.id); | |
return; | |
} | |
count++; | |
var req = {}; | |
req.headers = {}; | |
req.headers['User-Agent'] = config.userAgent; | |
/* | |
if(source.lastupdate) { | |
req.headers['If-Modified-Since'] = source.lastupdate.toUTCString(); | |
} | |
if(source.etag) { | |
req.headers['If-None-Match'] = source.etag; | |
} | |
*/ | |
req.timeout = 30 * 60 * 1000; | |
req.method = 'HEAD'; | |
httpreq.get(source.sourceurl, req, function(err, response){ | |
if(response){ | |
var statusCode = response.statusCode; | |
if(statusCode < 400){ | |
code = utils.colorOutput(statusCode, 'green'); | |
} else { | |
code = utils.colorOutput(statusCode, 'red'); | |
} | |
console.log('-', code, utils.colorOutput(source.id, 'blue'), source.sourceurl); | |
if(statusCode == 404){ | |
app.emit('error', source.id, 404, JSON.stringify(response)); | |
} else if(statusCode >= 400){ | |
app.emit('error', source.id, statusCode, JSON.stringify(response)); | |
} else { | |
/* | |
source.statusCode = statusCode; | |
if (statusCode == 200) { | |
var updateSourceSQL = 'update allsources set lastupdate = now(), etag = $1, cache = $2 where id = $3'; | |
var query = db.query(updateSourceSQL, [response.headers.etag, response.body, source.id]); | |
query.on('error', function(err){ | |
logError('Update Source error', err); | |
}); | |
} | |
*/ | |
} | |
} | |
if(err){ | |
logError('Request Error: ' + source.id + ' - ' + source.sourceurl, err); | |
app.emit('error', source.id, 999, err.toString()); | |
} | |
app.emit('checkCount'); | |
try{ | |
callback(); | |
}catch(err){ | |
console.error(err); | |
} | |
}); | |
}); | |
app.on('error', function(id, errorcode, errorinfo) { | |
var updateErrorsSQL = 'update allsources set errors = errors + 1, errorcode = $1, errorinfo = $2 where id = $3'; | |
var query = db.query(updateErrorsSQL, [errorcode, errorinfo, id]); | |
query.on('error', function(err){ | |
logError('Error error', err); | |
}); | |
}); | |
function log(source){ | |
console.log('-', utils.colorOutput(source.id, 'blue'), source.sourceurl); | |
} | |
function logError(message, err){ | |
console.error(utils.colorOutput('*** ' + message, 'red'), err); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment