Skip to content

Instantly share code, notes, and snippets.

@mattcolf
Last active January 31, 2019 17:59
Show Gist options
  • Save mattcolf/64f8132316a58a6e8c49126d7b910a6b to your computer and use it in GitHub Desktop.
Save mattcolf/64f8132316a58a6e8c49126d7b910a6b to your computer and use it in GitHub Desktop.
Quick and dirty script for collating and filtering IIS logs from multiple servers.
<?php
// manually set any config in this file, then run as follows
// usage: php merge.php
#
# CONFIG
#
// this (quick and dirty) script aggregates log entries from multiple locations, sorts them, and dumps them out
// useful for getting logs from multiple locations (or servers) between certain periods of time
// the min and max date times to grab, set to null to ignore
$date_min = new DateTime('2019-01-29 18:00:00 EST');
$date_max = new DateTime('2019-01-30 06:00:00 EST');
// an array_map compatible filter that can be used to set ignore criteria for lines
// return false to ignore a certain line (commented and empty lines are ignored for you)
$filter = function ($line) {
// look for health check traffic
if (stripos($line, 'ELB-HealthChecker/2.0') !== false) {
return false;
}
return true;
};
#
# SCRIPT START
#
// echo out all matching log lines from the following directories, typically a server name
foreach (allLines(['39', '40', '41'], $date_min, $date_max, $filter) as $i => $line) {
echo $line;
}
// get all matching lines from all files, ordered by date
function allLines(array $paths, DateTime $date_min = null, DateTime $date_max = null, callable $filter = null) {
$gens = [];
$lines = [];
foreach ($paths as $path) {
$gens[] = serverGenerator($path);
$lines[] = null;
}
while(atLeastOneValid($gens)) {
foreach ($gens as $i => $gen) {
// need a new value? let's grab one
if ($lines[$i] === null) {
$next = null;
while ($next === null && $gen->valid()) {
// grab the next line
$next = $gen->current();
$gen->next();
// ignore empty lines
if (empty($next)) {
$next = null;
continue;
}
// ignore commented lines
if ($next[0] == '#') {
$next = null;
continue;
}
// filter out any lines that don't meet criteria
if ($filter !== null & $filter($next) === false) {
$next = null;
continue;
}
// grab the date
if (1 != preg_match('#^([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3,})#', $next, $matches)) {
die("Unable to parse date from string '$next'.");
}
// parse the date
$date = DateTime::createFromFormat('Y-m-d H:i:s.u e', sprintf('%s EST', $matches[0]));
// check if the date is in range, assume EST
if (($date_min !== null && $date < $date_min) || ($date_max !== null && $date > $date_max)) {
$next = null;
}
$next = [$date, $next];
}
$lines[$i] = $next;
}
}
// remove any nulls (completed generators)
$picks = array_filter($lines, function($line) {
if ($line === null) {
return false;
}
return true;
});
// pick the lowest date
$lowest = null;
$pos = 0;
foreach ($picks as $i => $pick) {
if ($lowest === null || $pick[0] < $lowest[0] ) {
$lowest = $pick;
$pos = $i;
}
}
// remove the lowest from the current lines
$lines[$pos] = null;
yield $lowest[1];
}
}
// check if an array of generators contains one or more valid ones
function atLeastOneValid(array $gens) {
foreach ($gens as $gen) {
if ($gen->valid()) {
return true;
}
}
return false;
}
// produce a generator that returns lines from log files in a certain directory
// assumes that logs files when sorted by name, are in chronological order
function serverGenerator($path) {
$directory = new DirectoryIterator($path);
$files = [];
// this is dumb, but we have to sort the file names in-memory
foreach ($directory as $file) {
if (!$file->isDir() && $file->isFile()) {
$files[$file->getFilename()] = $file->openFile();
}
}
// need to sort manually as DirectoryIterator will not iterate in any defined order
ksort($files);
// run through each file in order, yielding one line at a time
foreach ($files as $name => $file) {
while (!$file->eof()) {
yield $file->fgets();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment