MrMarvin/cloud_front.rb

## cloud_front.rb
require 'json'
require 'ipaddr'

class IPAddr
  def to_cidr_string
    mask = @mask_addr.to_s(2).count('1')
    "#{self.to_s}/#{mask}"
  end
  def aws_region=(region)
    @aws_region = region
  end
  def aws_region
    @aws_region
  end
end

class AWSIPRange
  AWS_IP_RANGES_PATH = '/tmp/aws-ip-ranges.json'

  def initialize
    unless File.exist?(AWS_IP_RANGES_PATH)
      require 'net/http'
      puts "downloading AWS ip ranges to #{AWS_IP_RANGES_PATH}..." if ENV['DEBUG']
      File.open(AWS_IP_RANGES_PATH, 'w') { |f| f.write Net::HTTP.get(URI.parse('https://ip-ranges.amazonaws.com/ip-ranges.json')) }
    end

    @ipv4_ranges = JSON.parse(File.read(AWS_IP_RANGES_PATH))['prefixes'].map { |p| i = IPAddr.new(p['ip_prefix']); i.aws_region=p['region']; i }
    @ipv6_ranges = JSON.parse(File.read(AWS_IP_RANGES_PATH))['ipv6_prefixes'].map { |p| i = IPAddr.new(p['ipv6_prefix']); i.aws_region=p['region']; i }

    @cache = {}
  end

  def check_region(ip)
    ip = IPAddr.new(ip)
    (ip.ipv4? ? @ipv4_ranges : @ipv6_ranges).each do |range|
      return range if range.include? ip
    end
    return nil
  end

  def get_region(ip)
    (@cache[ip] ||= check_region(ip))&.aws_region || 'UNKNOWN'
  end

end

AWSIPCACHE = AWSIPRange.new

class CloudFront < RequestLogAnalyzer::FileFormat::Base
  extend RequestLogAnalyzer::FileFormat::CommonRegularExpressions

  line_definition :access do |line|
    line.header = true
    line.footer = true

    line.regexp = /^(#{timestamp('%Y-%m-%d	%H:%M:%S')})\s(\w+)\s(\d+)\s(#{ip_address})\s(\w+)\s(\S+)\s(\S+)\s(\d+)\s(\S+)\s(\S+)\s(\S+)\s(\S+)\s(\w+)\s(\S+)\s(\S+)\s(\w+)\s(\d+)\s(\S+)\s(#{ip_address}|-)\s+(\S+)\s(\S+)\s(\w+)\s(\S+)/

    line.capture(:timestamp).as(:timestamp)
    line.capture(:edge_location)
    line.capture(:bytes_sent).as(:traffic, unit: :byte)
    line.capture(:remote_ip)
    line.capture(:http_method)
    line.capture(:cloudfront_distribution)
    line.capture(:path).as(:path)
    line.capture(:http_status).as(:integer)
    line.capture(:referer)
    line.capture(:user_agent)
    line.capture(:query)
    line.capture(:cookie)
    line.capture(:edge_result_type)

    line.capture(:edge_request_id)
    line.capture(:host)
    line.capture(:protocol)
    line.capture(:bytes_received).as(:traffic, unit: :byte)

    line.capture(:duration).as(:duration, unit: :msec)
    line.capture(:forwarded_for).as(:nillable_string)
    line.capture(:ssl_protocol)
    line.capture(:ssl_cipher)

    line.capture(:edge_response_result_type)
    line.capture(:protocol_version)
  end

  report do |analyze|
    analyze.timespan
    analyze.hourly_spread

    analyze.frequency category: :http_method, title: 'HTTP methods'
    analyze.frequency category: :http_status, title: 'HTTP statuses'

    analyze.frequency category: :path, title: 'Most popular URIs'

    analyze.frequency category: :remote_ip, title: 'Most active clients'
    analyze.frequency category: lambda { |r| AWSIPCACHE.get_region(r[:remote_ip]) }, title: 'Most active AWS regions'

    analyze.frequency category: :edge_location, title: 'Most active edge locations'

    analyze.frequency category: :user_agent, title: 'User agents'
    analyze.frequency category: :referer,    title: 'Referers'

    analyze.frequency category: :edge_result_type,    title: 'Edge result types'

    analyze.duration duration: :duration,  category: :path, title: 'Request duration'
    analyze.traffic traffic: :bytes_sent, category: :path, title: 'Traffic out'
    analyze.traffic traffic: :bytes_received, category: :path, title: 'Traffic in'
  end

  class Request < RequestLogAnalyzer::Request
    # Do not use DateTime.parse, but parse the timestamp ourselves to return a integer
    # to speed up parsing.
    def convert_timestamp(value, _definition)
      "#{value[0, 4]}#{value[5, 2]}#{value[8, 2]}#{value[11, 2]}#{value[14, 2]}#{value[17, 2]}".to_i
    end
  end
end
	require 'json'
	require 'ipaddr'

	class IPAddr
	def to_cidr_string
	mask = @mask_addr.to_s(2).count('1')
	"#{self.to_s}/#{mask}"
	end
	def aws_region=(region)
	@aws_region = region
	end
	def aws_region
	@aws_region
	end
	end

	class AWSIPRange
	AWS_IP_RANGES_PATH = '/tmp/aws-ip-ranges.json'

	def initialize
	unless File.exist?(AWS_IP_RANGES_PATH)
	require 'net/http'
	puts "downloading AWS ip ranges to #{AWS_IP_RANGES_PATH}..." if ENV['DEBUG']
	File.open(AWS_IP_RANGES_PATH, 'w') { \|f\| f.write Net::HTTP.get(URI.parse('https://ip-ranges.amazonaws.com/ip-ranges.json')) }
	end

	@ipv4_ranges = JSON.parse(File.read(AWS_IP_RANGES_PATH))['prefixes'].map { \|p\| i = IPAddr.new(p['ip_prefix']); i.aws_region=p['region']; i }
	@ipv6_ranges = JSON.parse(File.read(AWS_IP_RANGES_PATH))['ipv6_prefixes'].map { \|p\| i = IPAddr.new(p['ipv6_prefix']); i.aws_region=p['region']; i }

	@cache = {}
	end

	def check_region(ip)
	ip = IPAddr.new(ip)
	(ip.ipv4? ? @ipv4_ranges : @ipv6_ranges).each do \|range\|
	return range if range.include? ip
	end
	return nil
	end

	def get_region(ip)
	(@cache[ip] \|\|= check_region(ip))&.aws_region \|\| 'UNKNOWN'
	end

	end

	AWSIPCACHE = AWSIPRange.new

	class CloudFront < RequestLogAnalyzer::FileFormat::Base
	extend RequestLogAnalyzer::FileFormat::CommonRegularExpressions

	line_definition :access do \|line\|
	line.header = true
	line.footer = true

	line.regexp = /^(#{timestamp('%Y-%m-%d %H:%M:%S')})\s(\w+)\s(\d+)\s(#{ip_address})\s(\w+)\s(\S+)\s(\S+)\s(\d+)\s(\S+)\s(\S+)\s(\S+)\s(\S+)\s(\w+)\s(\S+)\s(\S+)\s(\w+)\s(\d+)\s(\S+)\s(#{ip_address}\|-)\s+(\S+)\s(\S+)\s(\w+)\s(\S+)/

	line.capture(:timestamp).as(:timestamp)
	line.capture(:edge_location)
	line.capture(:bytes_sent).as(:traffic, unit: :byte)
	line.capture(:remote_ip)
	line.capture(:http_method)
	line.capture(:cloudfront_distribution)
	line.capture(:path).as(:path)
	line.capture(:http_status).as(:integer)
	line.capture(:referer)
	line.capture(:user_agent)
	line.capture(:query)
	line.capture(:cookie)
	line.capture(:edge_result_type)

	line.capture(:edge_request_id)
	line.capture(:host)
	line.capture(:protocol)
	line.capture(:bytes_received).as(:traffic, unit: :byte)

	line.capture(:duration).as(:duration, unit: :msec)
	line.capture(:forwarded_for).as(:nillable_string)
	line.capture(:ssl_protocol)
	line.capture(:ssl_cipher)

	line.capture(:edge_response_result_type)
	line.capture(:protocol_version)
	end

	report do \|analyze\|
	analyze.timespan
	analyze.hourly_spread

	analyze.frequency category: :http_method, title: 'HTTP methods'
	analyze.frequency category: :http_status, title: 'HTTP statuses'

	analyze.frequency category: :path, title: 'Most popular URIs'

	analyze.frequency category: :remote_ip, title: 'Most active clients'
	analyze.frequency category: lambda { \|r\| AWSIPCACHE.get_region(r[:remote_ip]) }, title: 'Most active AWS regions'

	analyze.frequency category: :edge_location, title: 'Most active edge locations'

	analyze.frequency category: :user_agent, title: 'User agents'
	analyze.frequency category: :referer, title: 'Referers'

	analyze.frequency category: :edge_result_type, title: 'Edge result types'

	analyze.duration duration: :duration, category: :path, title: 'Request duration'
	analyze.traffic traffic: :bytes_sent, category: :path, title: 'Traffic out'
	analyze.traffic traffic: :bytes_received, category: :path, title: 'Traffic in'
	end

	class Request < RequestLogAnalyzer::Request
	# Do not use DateTime.parse, but parse the timestamp ourselves to return a integer
	# to speed up parsing.
	def convert_timestamp(value, _definition)
	"#{value[0, 4]}#{value[5, 2]}#{value[8, 2]}#{value[11, 2]}#{value[14, 2]}#{value[17, 2]}".to_i
	end
	end
	end