Skip to content

Instantly share code, notes, and snippets.

@walkermatt
Created February 2, 2024 10:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save walkermatt/2c706c431c40610c9da78fafebcf5fb9 to your computer and use it in GitHub Desktop.
Save walkermatt/2c706c431c40610c9da78fafebcf5fb9 to your computer and use it in GitHub Desktop.
Parse a directory of nginx access logs to get a list of referer values
import { Parser } from '@robojones/nginx-log-parser';
import fs from 'fs';
import { createReadStream } from 'fs';
import zlib from 'zlib';
import readline from 'readline';
import path from 'path';
async function* processFile(filePath, filter) {
const extension = path.extname(filePath);
let readStream;
// Create a read stream based on file extension
if (extension === '.gz') {
readStream = fs.createReadStream(filePath).pipe(zlib.createGunzip());
} else {
// Assume we're reading a plain text file
readStream = createReadStream(filePath);
}
const rl = readline.createInterface({
input: readStream,
crlfDelay: Infinity,
});
for await (const line of rl) {
yield line;
}
}
// Function to read directory and process files
async function readDirectory(directoryPath) {
// The schema from the nginx config
const schema =
'$remote_addr - $remote_user [$time_local] "$request" $status $bytes_sent "$http_referer" "$http_user_agent"';
// Create a parser that can read our log schema.
const parser = new Parser(schema);
try {
const files = await fs.promises.readdir(directoryPath);
const referers = new Set();
for (const file of files) {
for await (const line of processFile(path.join(directoryPath, file))) {
const result = parser.parseLine(line);
if (result.http_referer != '-') {
// console.log(result);
referers.add(result.http_referer);
}
}
}
for (const referer of referers) {
console.log(referer);
}
} catch (error) {
console.error('Error reading directory:', error);
}
}
// Start reading the directory
readDirectory('/tmp/access_logs/');
{
"name": "parse-nginx-logs",
"version": "1.0.0",
"description": "",
"type": "module",
"main": "main.js",
"scripts": {},
"keywords": [],
"author": "Matt Walker (http://longwayaround.org.uk)",
"license": "ISC",
"dependencies": {
"@robojones/nginx-log-parser": "^0.0.6"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment