Last active
September 6, 2017 05:44
-
-
Save MrMarvin/9e96c8c14870d028038f5fcc9944b36b to your computer and use it in GitHub Desktop.
request log analyzer CloudFront with AWS region
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'json' | |
require 'ipaddr' | |
class IPAddr | |
def to_cidr_string | |
mask = @mask_addr.to_s(2).count('1') | |
"#{self.to_s}/#{mask}" | |
end | |
def aws_region=(region) | |
@aws_region = region | |
end | |
def aws_region | |
@aws_region | |
end | |
end | |
class AWSIPRange | |
AWS_IP_RANGES_PATH = '/tmp/aws-ip-ranges.json' | |
def initialize | |
unless File.exist?(AWS_IP_RANGES_PATH) | |
require 'net/http' | |
puts "downloading AWS ip ranges to #{AWS_IP_RANGES_PATH}..." if ENV['DEBUG'] | |
File.open(AWS_IP_RANGES_PATH, 'w') { |f| f.write Net::HTTP.get(URI.parse('https://ip-ranges.amazonaws.com/ip-ranges.json')) } | |
end | |
@ipv4_ranges = JSON.parse(File.read(AWS_IP_RANGES_PATH))['prefixes'].map { |p| i = IPAddr.new(p['ip_prefix']); i.aws_region=p['region']; i } | |
@ipv6_ranges = JSON.parse(File.read(AWS_IP_RANGES_PATH))['ipv6_prefixes'].map { |p| i = IPAddr.new(p['ipv6_prefix']); i.aws_region=p['region']; i } | |
@cache = {} | |
end | |
def check_region(ip) | |
ip = IPAddr.new(ip) | |
(ip.ipv4? ? @ipv4_ranges : @ipv6_ranges).each do |range| | |
return range if range.include? ip | |
end | |
return nil | |
end | |
def get_region(ip) | |
(@cache[ip] ||= check_region(ip))&.aws_region || 'UNKNOWN' | |
end | |
end | |
AWSIPCACHE = AWSIPRange.new | |
class CloudFront < RequestLogAnalyzer::FileFormat::Base | |
extend RequestLogAnalyzer::FileFormat::CommonRegularExpressions | |
line_definition :access do |line| | |
line.header = true | |
line.footer = true | |
line.regexp = /^(#{timestamp('%Y-%m-%d %H:%M:%S')})\s(\w+)\s(\d+)\s(#{ip_address})\s(\w+)\s(\S+)\s(\S+)\s(\d+)\s(\S+)\s(\S+)\s(\S+)\s(\S+)\s(\w+)\s(\S+)\s(\S+)\s(\w+)\s(\d+)\s(\S+)\s(#{ip_address}|-)\s+(\S+)\s(\S+)\s(\w+)\s(\S+)/ | |
line.capture(:timestamp).as(:timestamp) | |
line.capture(:edge_location) | |
line.capture(:bytes_sent).as(:traffic, unit: :byte) | |
line.capture(:remote_ip) | |
line.capture(:http_method) | |
line.capture(:cloudfront_distribution) | |
line.capture(:path).as(:path) | |
line.capture(:http_status).as(:integer) | |
line.capture(:referer) | |
line.capture(:user_agent) | |
line.capture(:query) | |
line.capture(:cookie) | |
line.capture(:edge_result_type) | |
line.capture(:edge_request_id) | |
line.capture(:host) | |
line.capture(:protocol) | |
line.capture(:bytes_received).as(:traffic, unit: :byte) | |
line.capture(:duration).as(:duration, unit: :msec) | |
line.capture(:forwarded_for).as(:nillable_string) | |
line.capture(:ssl_protocol) | |
line.capture(:ssl_cipher) | |
line.capture(:edge_response_result_type) | |
line.capture(:protocol_version) | |
end | |
report do |analyze| | |
analyze.timespan | |
analyze.hourly_spread | |
analyze.frequency category: :http_method, title: 'HTTP methods' | |
analyze.frequency category: :http_status, title: 'HTTP statuses' | |
analyze.frequency category: :path, title: 'Most popular URIs' | |
analyze.frequency category: :remote_ip, title: 'Most active clients' | |
analyze.frequency category: lambda { |r| AWSIPCACHE.get_region(r[:remote_ip]) }, title: 'Most active AWS regions' | |
analyze.frequency category: :edge_location, title: 'Most active edge locations' | |
analyze.frequency category: :user_agent, title: 'User agents' | |
analyze.frequency category: :referer, title: 'Referers' | |
analyze.frequency category: :edge_result_type, title: 'Edge result types' | |
analyze.duration duration: :duration, category: :path, title: 'Request duration' | |
analyze.traffic traffic: :bytes_sent, category: :path, title: 'Traffic out' | |
analyze.traffic traffic: :bytes_received, category: :path, title: 'Traffic in' | |
end | |
class Request < RequestLogAnalyzer::Request | |
# Do not use DateTime.parse, but parse the timestamp ourselves to return a integer | |
# to speed up parsing. | |
def convert_timestamp(value, _definition) | |
"#{value[0, 4]}#{value[5, 2]}#{value[8, 2]}#{value[11, 2]}#{value[14, 2]}#{value[17, 2]}".to_i | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment