Skip to content

Instantly share code, notes, and snippets.

@dannysauer
Created January 31, 2022 06:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dannysauer/94e8df84d25d78806aef313b58ffae46 to your computer and use it in GitHub Desktop.
Save dannysauer/94e8df84d25d78806aef313b58ffae46 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use warnings;
use strict;
use Carp;
use DateTime;
use Date::Parse qw( str2time );
use JSON qw( decode_json );
# legacy ingress log examples:
# 198.53.182.251 - - [02/Dec/2021:01:15:03 +0000] "GET /insomnia-ubuntu/dists/default/Release.gpg HTTP/1.1" 404 14 "-" "Debian APT-HTTP/1.3 (2.0.6)" 127 0.054 [pulp-pulp-content-24816] [] 10.17.31.6:24816 14 0.052 404 eba226d18193c0907e4f76746410a7a7
# 183.3.220.130 - - [01/Dec/2021:09:25:40 +0000] "GET /favicon.ico HTTP/2.0" 404 14 "https://download.konghq.com/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36" 130 0.006 [pulp-pulp-content-24816] [] 10.17.13.2:24816 14 0.004 404 7260c85a9fb316187c5fa1ef0a8428c7
# 54.169.106.92 - - [25/Nov/2021:05:46:15 +0000] "GET / HTTP/1.1" 200 1168 "() { ignored; }; echo Content-Type: text/html; echo ; /bin/cat /etc/passwd" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36" 494 0.009 [pulp-pulp-content-24816] [] 10.17.31.6:24816 12555 0.008 200 c9b6ff27f6ed78f88a8ce5b68b0dc6f1
#
#
# JSON ingress log example:
# root@danny-ubuntu:/tmp# zcat ingress.gz | head -n1 | jq
#{
# "ts": "2022-01-31T04:10:37+00:00",
# "tsNs": "1643602237.917",
# "level": "info",
# "msg": "302 -> GET download.konghq.com/gateway-0.x-centos-7/Packages/k/kong-0.14.1.el7.noarch.rpm",
# "service": "nginx-ingress",
# "duration": "0.368 s",
# "totalBytes": "10",
# "traceID": "",
# "token": "no_token",
# "clientIP": "74.11.33.194",
# "request": {
# "method": "GET",
# "requestSize": "416",
# "responseSize": "10",
# "userAgent": "Chef Client/14.5.33 (ruby-2.5.1-p57; ohai-14.5.4; x86_64-linux; +https://chef.io)",
# "remoteIp": "74.11.33.194",
# "referer": "",
# "protocol": "HTTP/1.1"
# },
# "status": "302",
# "uri": "download.konghq.com/gateway-0.x-centos-7/Packages/k/kong-0.14.1.el7.noarch.rpm",
# "extra": {
# "proxyUpstreamName": "pulp-pulp-content-24816",
# "proxyAlternativeUpstreamName": "",
# "upstreamStatus": "302",
# "upstreamAddr": "10.17.21.33:24816"
# }
#}
sub emit_log {
my(
$timestamp,
$ip,
$status,
$useragent,
$method,
$file,
) = @_;
return if $file eq '/';
# calculate a couple of fields
#print "parsing time '$timestamp'\n";
my $dt = DateTime->from_epoch(epoch=>str2time($timestamp));
$dt->set_time_zone('UTC');
my $ts = $dt->strftime("[%d/%b/%Y:%H:%M:%S %z]");
my $simpath = $file;
$simpath =~ s|/|__|g;
my $filename = substr($file, 1+rindex($file, '/'));
my $reqURI = "$method /pulp3-media/simulated/someobject?response-content-disposition=attachment;x-pulp-artifact-path=${simpath};filename=${filename}&X-fake-parameters=yessir HTTP/1.1";
# the S3 format
print qq{owner kong-cloud-01-prod-us-east-2-kong-packages-origin $ts $ip - requestor REST.GET.OBJECT key "$reqURI" $status "-" "-" 1 1 1 "referer" "$useragent" "-" hostid SigV2 - - host.header - arn \n};
}
my $decoded;
my %fields;
while(<>){
if( m/
(?P<ip>(\d{1,3}\.?){4})\s+
(?P<identity>\S+)\s+
(?P<userid>\S+)\s+
\[(?P<time>.*?)\]\s+
"(?P<request>
(?P<request_method>\S+)\s+
(?P<request_path>\S+)\s*
.*?
)"\s+
(?P<status>\d+)\s+
(?P<size>\d+|"-")\s+
"(?P<referer>.*?)"\s+
"(?P<useragent>.*?)"\s+
(?P<body_bytes>\d+)\s+
(?P<response_time>\d+(?:\.\d+)?)\s+
(?P<rest_of_line>.*)\s*
$
/x ){
1;
#print qq[$+{request_method} request of $+{request_path} (via "$+{request}") from $+{ip} using $+{useragent} returned status $+{status} in $+{response_time} seconds\n];
%fields = %+;
emit_log( @+{'time', 'ip', 'status', 'useragent', 'request_method', 'request_path'} )
}
elsif( $decoded = decode_json($_) ){
%fields = %$decoded;
#print "found json ($fields{msg})\n";
emit_log( @fields{'ts', 'clientIP', 'status'},
$fields{request}{userAgent},
$fields{request}{method},
# JSON logs use "hostname/path" in URI
substr($fields{uri}, index($fields{uri}, '/')),
)
}
else{
carp $_;
continue;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment