Skip to content

Instantly share code, notes, and snippets.

@briri
Created January 25, 2023 17:48
Show Gist options
  • Save briri/d74404cd4fcd0b1969a3c6d0d2fc6e8a to your computer and use it in GitHub Desktop.
Save briri/d74404cd4fcd0b1969a3c6d0d2fc6e8a to your computer and use it in GitHub Desktop.
# Apache Log processor to analize the logs and determine the number of times each path was called, what the response
# was, who called it, and whether or not it looks suspicious
#
# To run it, you should pull down the logs that you want to analize (unzip them) and then combine them into a single log
# file. For example:
# > touch combined.log
# > cat dmptool-stg.cdlib.org-access_log-20230123 >> combined.log
# > cat dmptool-stg.cdlib.org-access_log-20230122 >> combined.log
#
# Then process the combined log file:
# > ruby apache_access_log_analizer.rb combined.log
TIMESTAMP = %r{\s+\[[0-9]{2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2}\s-0800\]\s+}
if ARGV.length < 1
p "Please specify a file name! Note that the file MUST reside in this directory"
else
p "Processing #{ARGV[0]} ..."
file = File.open('log-analysis.log', 'w+')
# CHANGE THIS TO WHATEVER FILE CONTAINS YOUR APACHE LOGS
file_name = ARGV[0]
# Expecting apache logs that look like this:
# 172.31.57.192 - - [23/Jan/2023:18:49:39 -0800] "GET /Shibboleth.sso/Login?target=https://localhost:3000/users/auth/shibboleth/callback HTTP/1.1" 302 517
log = File.readlines(file_name)
p "Initial nbr of log entries: #{log.length}"
file.write("Initial nbr of log entries: #{log.length}\n")
p " first entry: #{log.first.match(TIMESTAMP)} - last entry: #{log.last.match(TIMESTAMP)}"
file.write(" first entry: #{log.first.match(TIMESTAMP)} - last entry: #{log.last.match(TIMESTAMP)}\n\n")
p ''
ips = []
requests = []
p "Parsing log entries into their important parts ..."
log.each do |entry|
next if entry.nil? || entry == '' || entry == '\\n'
# Split the line on the timestamp, we're not concerned with that
parts = entry.split(TIMESTAMP)
p "SOMETHING WENT WRONG! Cannot parse this entry: #{entry}" unless parts.length > 1
# Strip out the IP address and other info
ips << parts[0].split('-').map(&:strip).reject{ |item| item.nil? || item == '' }.join(' - ')
# Strip out just the path called
requests << parts[1].split(' ')[1]
end
p ''
p "==================="
p "#{ips.compact.uniq.length} unique IP addresses"
file.write("#{ips.compact.uniq.length} unique IP addresses\n")
pp ips.compact.uniq.map { |ip| " #{ip} - #{ips.select{ |item| item == ip }.length} calls made" }
file.write(ips.compact.uniq.map { |ip| " #{ip} - #{ips.select{ |item| item == ip }.length} calls made" })
file.write("\n")
p "#{requests.compact.uniq.length} unique Paths (e.g. \"/plans\") called"
file.write("#{requests.compact.uniq.length} unique Paths (e.g. \"/plans\") called\n\n")
p ''
p 'This could take some time depending on the number of entries you supplied ...'
p ''
def is_suspicious(request)
return false if request.include?('Shibboleth.sso') ||
(request.start_with?('/assets/') && request[request.length - 4..request.length] =~ /\.[a-z]{3}/)
request.length > 50 ||
request.include?('%7') ||
request.include?('$') ||
request.include?('wp-') ||
request.include?('graphql') ||
request.include?('.env') ||
!(request =~ %r{/api/v[0-9]/users/}).nil?
end
stats = []
requests.compact.uniq.each do |req|
occurences = log.select { |entry| entry.include?(" #{req} ") }
status_code = occurences.first.split(TIMESTAMP).last&.split(' ')
status_code = status_code.length > 1 ? status_code[status_code.length - 2] : '-'
stats << {
path: req,
ips: occurences.map { |entry| entry[0..12] }.uniq,
status: status_code,
count: occurences.length,
suspicious: is_suspicious(req)
}
end
p "==================="
file.write("===================\n")
p "Analysis (sorted by HTTP status code ASC and number of calls DESC):"
file.write("Analysis (sorted by HTTP status code ASC and number of calls DESC):\n\n")
p ""
stats.sort { |a, b| [a[:status], b[:count]] <=> [b[:status], a[:count]] }.each do |request|
p " SUSPICIOUS!" if request[:suspicious]
file.write("SUSPICIOUS!\n") if request[:suspicious]
p " #{request[:ips]} called #{request[:count]} time(s) and responded with #{request[:status]}"
file.write("#{request[:ips]} called #{request[:count]} time(s) and responded with #{request[:status]}\n")
p " #{request[:path]}"
file.write(" #{request[:path]}\n")
p "------------------------"
file.write("------------------------\n\n")
end
p "Output written to ./log-analysis.log"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment