Skip to content

Instantly share code, notes, and snippets.

@mttjohnson
Last active November 10, 2021 00:00
Show Gist options
  • Save mttjohnson/a989f43f9879a96d8b2e556107405e46 to your computer and use it in GitHub Desktop.
Save mttjohnson/a989f43f9879a96d8b2e556107405e46 to your computer and use it in GitHub Desktop.
Parsing nginx web request access logs
# Yesterday's and today's access log
# Filter to only show place order actions submitting payment requests that fail (400) or not
# get a count of the number of events in the logs
cat $(find . -regex '.*/www-prod_backend-access\.log-[0-9]+' -print0) www-prod_backend-access.log \
| grep -E 'POST \/rest\/[A-Za-z0-9_]+\/V1\/guest-carts\/[A-Za-z0-9]*\/payment-information HTTP\/[1-2]\.[0-1]" 400' \
| wc -l
cat $(find . -regex '.*/www-prod_backend-access\.log-[0-9]+' -print0) www-prod_backend-access.log \
| grep -E 'POST \/rest\/[A-Za-z0-9_]+\/V1\/guest-carts\/[A-Za-z0-9]*\/payment-information HTTP\/[1-2]\.[0-1]" [^4]00' \
| wc -l
# Extract a list of IPs from the failed requests
cat $(find . -regex '.*/www-prod_backend-access\.log-[0-9]+' -print0) www-prod_backend-access.log \
| grep -E 'POST \/rest\/[A-Za-z0-9_]+\/V1\/guest-carts\/[A-Za-z0-9]*\/payment-information HTTP\/[1-2]\.[0-1]" 400' \
| perl -p -e 's/.+"(\d+\.\d+\.\d+\.\d+), 127.0.0.1"/$1/' \
| sort | uniq --count
# Extract a list of dates from the failed requests
zcat www-prod_backend-access.log-202108*.gz \
| grep -E 'POST \/rest\/[A-Za-z0-9_]+\/V1\/guest-carts\/[A-Za-z0-9]*\/payment-information HTTP\/[1-2]\.[0-1]" 400' \
| perl -p -e 's/\d+\.\d+\.\d+\.\d+.+?\[(.+?)\:.+\].+/$1/' \
| sort | uniq --count
# See a list of all requests and sort IP list by count of requests
cat access.log \
| cut -d ' ' -f 1 \
| sort | uniq --count | sort -nr
# See a list of IPs with a count for how many requests from each IP
cat /var/log/nginx/www.example.com-access.log-20190313 \
| grep -Ei 'POST /checkout/onepage/savePayment/.+ "-" "Mozilla/5.0" "-"' \
| cut -d ' ' -f 1 \
| sort | uniq --count
# Create list of bad IPs
cat /var/log/nginx/www.example.com-access.log-20190313 \
| grep -Ei 'POST /checkout/onepage/savePayment/.+ "-" "Mozilla/5.0" "-"' \
| cut -d ' ' -f 1 \
| sort | uniq > bad_payment_ips.txt
# Create a list of all requests from bad IPs
while read line; do
cat /var/log/nginx/www.example.com-access.log-20190313 \
| grep ${line} >> bad_ips_requests.txt
done < bad_payment_ips.txt
# count number of bad requests
cat bad_ips_requests.txt | wc -l
# Look through all the bot requests and filter out certain requests
cat ~/bad_ips_requests.txt \
| grep -Eiv 'POST /checkout/onepage/savePayment/' \
| grep -Eiv 'POST /checkout/onepage/saveOrder/form_key/'
# Get a list of unique query string parameters
cat access.log \
| grep -Ei '_kx' \
| perl -p -e 's/.+?_kx=(.+?)["& ].+/$1/' \
| sort | uniq --count | sort -nr
# Filter timestamps on requests to the minute to get counts of requests per minute
cat access.log \
| grep -Ei '_kx' \
| perl -p -e 's/\d+\.\d+\.\d+\.\d+.+?\[(.+?\:\d+\:\d+)\:\d+.+\].+/$1/' \
| grep -Ei '01/Oct/2021:1[567]' \
| sort | uniq --count
# Complex parsing of log files with Perl
# Nginx Access Log RegEx
# (?<remote_addr>[^\s]+)\s[^\s]+\s[^\s]+\s\[(?<time_local>.+?\:.+?)\]\s"(?<method>\w+)\s(?<url_path>[\w\/\=\%\&\_\-\.\+\[\]\(\)\!\,]+)(?<url_query_string>\??[\w\/\=\%\&\_\-\.\+\[\]\(\)\!\,]*?)\s(?<protocol>[\w\/\.]+?)"\s(?<status>\d+)\s(?<body_bytes_sent>[\d\.]+)\s(?<request_time>[\d\.]+)\s.+
# Labels for Match Groups
# remote_addr: $1 $+{remote_addr}
# time_local: $2 $+{time_local}
# method: $3 $+{method}
# url_path: $4 $+{url_path}
# url_query_string: $5 $+{url_query_string}
# protocol: $6 $+{protocol}
# status: $7 $+{status}
# body_bytes_sent: $8 $+{body_bytes_sent}
# request_time: $9 $+{request_time}
cat access.log \
| perl -lne '$_ =~ m/(?<remote_addr>[^\s]+)\s[^\s]+\s[^\s]+\s\[(?<time_local>.+?\:.+?)\]\s"(?<method>\w+)\s(?<url_path>[\w\/\=\%\&\_\-\.\+\[\]\(\)\!\,]+)(?<url_query_string>\??[\w\/\=\%\&\_\-\.\+\[\]\(\)\!\,]*?)\s(?<protocol>[\w\/\.]+?)"\s(?<status>\d+)\s(?<body_bytes_sent>[\d\.]+)\s(?<request_time>[\d\.]+)\s.+/;
$remote_addr = $+{remote_addr};
$time_local = $+{time_local};
$method = $+{method};
$url_path = $+{url_path};
$url_query_string = $+{url_query_string};
$protocol = $+{protocol};
$status = $+{status};
$body_bytes_sent = $+{body_bytes_sent};
$request_time = $+{request_time};
print $_
if (
$time_local =~ m{24/Aug/2021:10:00}
and not $url_path =~ m{/static}
and not $url_path =~ m{/media}
)
'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment