Created
January 25, 2023 17:48
-
-
Save briri/d74404cd4fcd0b1969a3c6d0d2fc6e8a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Apache Log processor to analize the logs and determine the number of times each path was called, what the response | |
# was, who called it, and whether or not it looks suspicious | |
# | |
# To run it, you should pull down the logs that you want to analize (unzip them) and then combine them into a single log | |
# file. For example: | |
# > touch combined.log | |
# > cat dmptool-stg.cdlib.org-access_log-20230123 >> combined.log | |
# > cat dmptool-stg.cdlib.org-access_log-20230122 >> combined.log | |
# | |
# Then process the combined log file: | |
# > ruby apache_access_log_analizer.rb combined.log | |
TIMESTAMP = %r{\s+\[[0-9]{2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2}\s-0800\]\s+} | |
if ARGV.length < 1 | |
p "Please specify a file name! Note that the file MUST reside in this directory" | |
else | |
p "Processing #{ARGV[0]} ..." | |
file = File.open('log-analysis.log', 'w+') | |
# CHANGE THIS TO WHATEVER FILE CONTAINS YOUR APACHE LOGS | |
file_name = ARGV[0] | |
# Expecting apache logs that look like this: | |
# 172.31.57.192 - - [23/Jan/2023:18:49:39 -0800] "GET /Shibboleth.sso/Login?target=https://localhost:3000/users/auth/shibboleth/callback HTTP/1.1" 302 517 | |
log = File.readlines(file_name) | |
p "Initial nbr of log entries: #{log.length}" | |
file.write("Initial nbr of log entries: #{log.length}\n") | |
p " first entry: #{log.first.match(TIMESTAMP)} - last entry: #{log.last.match(TIMESTAMP)}" | |
file.write(" first entry: #{log.first.match(TIMESTAMP)} - last entry: #{log.last.match(TIMESTAMP)}\n\n") | |
p '' | |
ips = [] | |
requests = [] | |
p "Parsing log entries into their important parts ..." | |
log.each do |entry| | |
next if entry.nil? || entry == '' || entry == '\\n' | |
# Split the line on the timestamp, we're not concerned with that | |
parts = entry.split(TIMESTAMP) | |
p "SOMETHING WENT WRONG! Cannot parse this entry: #{entry}" unless parts.length > 1 | |
# Strip out the IP address and other info | |
ips << parts[0].split('-').map(&:strip).reject{ |item| item.nil? || item == '' }.join(' - ') | |
# Strip out just the path called | |
requests << parts[1].split(' ')[1] | |
end | |
p '' | |
p "===================" | |
p "#{ips.compact.uniq.length} unique IP addresses" | |
file.write("#{ips.compact.uniq.length} unique IP addresses\n") | |
pp ips.compact.uniq.map { |ip| " #{ip} - #{ips.select{ |item| item == ip }.length} calls made" } | |
file.write(ips.compact.uniq.map { |ip| " #{ip} - #{ips.select{ |item| item == ip }.length} calls made" }) | |
file.write("\n") | |
p "#{requests.compact.uniq.length} unique Paths (e.g. \"/plans\") called" | |
file.write("#{requests.compact.uniq.length} unique Paths (e.g. \"/plans\") called\n\n") | |
p '' | |
p 'This could take some time depending on the number of entries you supplied ...' | |
p '' | |
def is_suspicious(request) | |
return false if request.include?('Shibboleth.sso') || | |
(request.start_with?('/assets/') && request[request.length - 4..request.length] =~ /\.[a-z]{3}/) | |
request.length > 50 || | |
request.include?('%7') || | |
request.include?('$') || | |
request.include?('wp-') || | |
request.include?('graphql') || | |
request.include?('.env') || | |
!(request =~ %r{/api/v[0-9]/users/}).nil? | |
end | |
stats = [] | |
requests.compact.uniq.each do |req| | |
occurences = log.select { |entry| entry.include?(" #{req} ") } | |
status_code = occurences.first.split(TIMESTAMP).last&.split(' ') | |
status_code = status_code.length > 1 ? status_code[status_code.length - 2] : '-' | |
stats << { | |
path: req, | |
ips: occurences.map { |entry| entry[0..12] }.uniq, | |
status: status_code, | |
count: occurences.length, | |
suspicious: is_suspicious(req) | |
} | |
end | |
p "===================" | |
file.write("===================\n") | |
p "Analysis (sorted by HTTP status code ASC and number of calls DESC):" | |
file.write("Analysis (sorted by HTTP status code ASC and number of calls DESC):\n\n") | |
p "" | |
stats.sort { |a, b| [a[:status], b[:count]] <=> [b[:status], a[:count]] }.each do |request| | |
p " SUSPICIOUS!" if request[:suspicious] | |
file.write("SUSPICIOUS!\n") if request[:suspicious] | |
p " #{request[:ips]} called #{request[:count]} time(s) and responded with #{request[:status]}" | |
file.write("#{request[:ips]} called #{request[:count]} time(s) and responded with #{request[:status]}\n") | |
p " #{request[:path]}" | |
file.write(" #{request[:path]}\n") | |
p "------------------------" | |
file.write("------------------------\n\n") | |
end | |
p "Output written to ./log-analysis.log" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment