public
Created

node.js script to parse Drupal logs in linux syslog (and find distinct 404'd URLs)

  • Download Gist
drupal-syslog-parser.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
// drupal log parser w/ node.js
// takes a filtered syslog file
// run as node `drupal-syslog-parser.js LOGPATH`
 
// [install dependencies (lazy,underscore) first with `npm install ___`]
var lazy = require('lazy')
, fs = require('fs')
, path = require('path')
, _ = require('underscore');
 
var logPath = null;
 
process.argv.forEach(function (val, index, array) {
// console.log(index + ': ' + val);
if (index == 2 && val != "") {
var exists = path.existsSync(val);
if (! exists) {
console.log("Can't find log file ", val);
process.exit(1);
}
logPath = val;
}
});
 
if (logPath == null) {
console.log("No log path param.");
process.exit(1);
}
 
var countLines = 0
, lines, lineSplit
, parts
, mapping = { 0:'heading', 1:'timestamp', 2:'type', 3:'ip', 4:'url', 5:'referer', 6:'severity', 7:'message', 8:'link'} // [?] need to confirm
, logs = []
;
// loads the whole file first, bad for memory, but unclear how else
fs.readFile(logPath, function(err, data) {
if (err) throw(err);
lines = data.toString().split("\n");
console.log("Found %d lines", lines.length);
// reduce (tmp)
// lines = lines.slice(0, 10);
lines.forEach(function(line, ind) {
// console.log(ind + ". " + line);
lineSplit = line.split('|');
// map keys in mapping to parts
parts = {};
_.each(mapping, function(key, ind) {
if (! _.isUndefined(lineSplit[ind])) {
parts[key] = lineSplit[ind];
}
else parts[key] = null;
});
logs.push(parts);
}); //each line
// == at this point, logs contains array of log objects ==
// console.log(logs);
console.log("Parsed %d log lines", logs.length);
// find the unique URLs in 404s
var missingUrls = [];
_.each(logs, function(log, ind) {
if (log.type == 'page not found') {
missingUrls.push(log.url);
}
});
// distinct & sort
missingUrls = _.uniq(missingUrls);
missingUrls.sort();
console.log("%d distinct missing URLs\n", missingUrls.length);
console.log(missingUrls.join("\n"));
});

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.