Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

node.js script to parse Drupal logs in linux syslog (and find distinct 404'd URLs)

View drupal-syslog-parser.js
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
// drupal log parser w/ node.js
// takes a filtered syslog file
// run as node `drupal-syslog-parser.js LOGPATH`
 
// [install dependencies (lazy,underscore) first with `npm install ___`]
var lazy = require('lazy')
, fs = require('fs')
, path = require('path')
, _ = require('underscore');
 
var logPath = null;
 
process.argv.forEach(function (val, index, array) {
// console.log(index + ': ' + val);
if (index == 2 && val != "") {
var exists = path.existsSync(val);
if (! exists) {
console.log("Can't find log file ", val);
process.exit(1);
}
logPath = val;
}
});
 
if (logPath == null) {
console.log("No log path param.");
process.exit(1);
}
 
var countLines = 0
, lines, lineSplit
, parts
, mapping = { 0:'heading', 1:'timestamp', 2:'type', 3:'ip', 4:'url', 5:'referer', 6:'severity', 7:'message', 8:'link'} // [?] need to confirm
, logs = []
;
// loads the whole file first, bad for memory, but unclear how else
fs.readFile(logPath, function(err, data) {
if (err) throw(err);
lines = data.toString().split("\n");
console.log("Found %d lines", lines.length);
// reduce (tmp)
// lines = lines.slice(0, 10);
lines.forEach(function(line, ind) {
// console.log(ind + ". " + line);
lineSplit = line.split('|');
// map keys in mapping to parts
parts = {};
_.each(mapping, function(key, ind) {
if (! _.isUndefined(lineSplit[ind])) {
parts[key] = lineSplit[ind];
}
else parts[key] = null;
});
logs.push(parts);
}); //each line
// == at this point, logs contains array of log objects ==
// console.log(logs);
console.log("Parsed %d log lines", logs.length);
// find the unique URLs in 404s
var missingUrls = [];
_.each(logs, function(log, ind) {
if (log.type == 'page not found') {
missingUrls.push(log.url);
}
});
// distinct & sort
missingUrls = _.uniq(missingUrls);
missingUrls.sort();
console.log("%d distinct missing URLs\n", missingUrls.length);
console.log(missingUrls.join("\n"));
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.