Skip to content

Instantly share code, notes, and snippets.

@standa
Created January 4, 2018 14:29
Show Gist options
  • Save standa/b7d865a64e324cadaec04e0e80d8a535 to your computer and use it in GitHub Desktop.
Save standa/b7d865a64e324cadaec04e0e80d8a535 to your computer and use it in GitHub Desktop.
Apache gzip logs by domain
<?php
/**
* Process all apache *.log.gz access logs in current directory
* - split data by domain (first field in access log)
*/
if (!is_dir('out')) {
mkdir('out');
}
$fw = [];
$files = glob('./*.gz');
natsort($files);
$files = array_reverse($files);
foreach ($files as $file) {
echo 'Processing '.$file.PHP_EOL;
process($file);
}
foreach ($fw as $p) {
fclose($p);
}
function process($file)
{
global $fw, $files;
if (!file_exists($file)) {
die ('File '.$file.' does not exist');
}
$fp = gzopen($file, 'rb');
if (!$fp) {
die('Could not open file '.$fp);
}
$i = 0;
while (($line = fgets($fp, 10000)) !== false && ($line = trim($line)) !== '') {
$domain = preg_replace('/[^\w\d\.\-]+/', '_', strtok($line, ' '));
if (!isset($fw[$domain])) {
echo "\nOpening 'out/$domain.log.gz'\n";
$fw[$domain] = gzopen('out/'.$domain.'.log.gz', 'wb');
}
fwrite($fw[$domain], $line . PHP_EOL);
$i++;
if ($i % 1000 === 0) {
echo sprintf("\rFile %s/%s Line %s",
array_search($file, $files),
count($files),
$i
);
}
}
echo PHP_EOL;
fclose($fp);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment