Skip to content

Instantly share code, notes, and snippets.

@benbuckman
Created November 29, 2011 17:56
Show Gist options
  • Save benbuckman/1405720 to your computer and use it in GitHub Desktop.
Save benbuckman/1405720 to your computer and use it in GitHub Desktop.
node.js script to parse Drupal logs in linux syslog (and find distinct 404'd URLs)
// drupal log parser w/ node.js
// takes a filtered syslog file
// run as node `drupal-syslog-parser.js LOGPATH`
// [install dependencies (lazy,underscore) first with `npm install ___`]
var lazy = require('lazy')
, fs = require('fs')
, path = require('path')
, _ = require('underscore');
var logPath = null;
process.argv.forEach(function (val, index, array) {
// console.log(index + ': ' + val);
if (index == 2 && val != "") {
var exists = path.existsSync(val);
if (! exists) {
console.log("Can't find log file ", val);
process.exit(1);
}
logPath = val;
}
});
if (logPath == null) {
console.log("No log path param.");
process.exit(1);
}
var countLines = 0
, lines, lineSplit
, parts
, mapping = { 0:'heading', 1:'timestamp', 2:'type', 3:'ip', 4:'url', 5:'referer', 6:'severity', 7:'message', 8:'link'} // [?] need to confirm
, logs = []
;
// loads the whole file first, bad for memory, but unclear how else
fs.readFile(logPath, function(err, data) {
if (err) throw(err);
lines = data.toString().split("\n");
console.log("Found %d lines", lines.length);
// reduce (tmp)
// lines = lines.slice(0, 10);
lines.forEach(function(line, ind) {
// console.log(ind + ". " + line);
lineSplit = line.split('|');
// map keys in mapping to parts
parts = {};
_.each(mapping, function(key, ind) {
if (! _.isUndefined(lineSplit[ind])) {
parts[key] = lineSplit[ind];
}
else parts[key] = null;
});
logs.push(parts);
}); //each line
// == at this point, logs contains array of log objects ==
// console.log(logs);
console.log("Parsed %d log lines", logs.length);
// find the unique URLs in 404s
var missingUrls = [];
_.each(logs, function(log, ind) {
if (log.type == 'page not found') {
missingUrls.push(log.url);
}
});
// distinct & sort
missingUrls = _.uniq(missingUrls);
missingUrls.sort();
console.log("%d distinct missing URLs\n", missingUrls.length);
console.log(missingUrls.join("\n"));
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment