Created
August 1, 2012 13:44
-
-
Save wildlyinaccurate/3226944 to your computer and use it in GitHub Desktop.
Log replayer that flags URLs where the X-Check-Cacheable header returned by Akamai is NO
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=begin | |
This script will replay an Apache log file and verify whether each request | |
is cacheable by Akamai. | |
To run the script, provide the log file and your Akamai origin host name as | |
arguments, for example: | |
replayer.rb /var/log/apache/access.log http://dsd-www.example.com.edgesuite-staging.net | |
=end | |
require 'net/http' | |
class AkamaiCacheLogParser | |
USAGE = "Usage: #{__FILE__} <INPUT_FILE> <HOST>" | |
PRAGMA_HEADER = 'akamai-x-cache-on, akamai-x-check-cacheable' | |
def initialize path = '', host = nil | |
abort("Unable to read input file. #{USAGE}") unless File.exists?(path) | |
abort("No host specified. #{USAGE}") unless !host.nil? | |
@requests_processed = 0 | |
@not_cacheable = [] | |
@log = File.open(path).read | |
@log_lines = @log.split("\n") | |
uri = URI(host) | |
@host = uri.host | |
end | |
def run | |
line_parts = [] | |
# Maybe a big assumption, but we'll get the host and port from | |
# the first line so that we can use only one connection | |
first_line = @log_lines[0] | |
host_header, port = first_line.split(' ')[0].split(':') | |
Net::HTTP.start(@host, port) do |http| | |
@log_lines.each do |line| | |
line_parts = line.split(' ') | |
path = line_parts[10] | |
request = Net::HTTP::Get.new path | |
request['Pragma'] = PRAGMA_HEADER | |
request['Host'] = host_header | |
begin | |
response = http.request request | |
rescue Interrupt | |
puts "Aborted." | |
Process.exit | |
rescue Exception => exc | |
next | |
ensure | |
@requests_processed += 1 | |
if @requests_processed % 1000 == 0 | |
puts "Processed #{@requests_processed} requests." | |
$stdout.flush | |
end | |
end | |
@not_cacheable.push(path) if response['x-check-cacheable'] == 'NO' && response.code == '200' | |
end | |
end | |
end | |
def results | |
output = "==========\n" | |
output += "Results:\n" | |
output += "Processed #{@requests_processed} requests and found #{@not_cacheable.length} uncacheable paths.\n\n" | |
# Order the non-cacheable paths by the number of times they were requested | |
b = Hash.new(0) | |
@not_cacheable.each do |path| | |
b[path] += 1 | |
end | |
b = b.sort_by {|key, value| value}.reverse | |
b.each do |path, count| | |
output += "[#{count}] #{path}\n" | |
end | |
return output | |
end | |
end | |
parser = AkamaiCacheLogParser.new(*ARGV) | |
begin | |
puts 'Running...' | |
parser.run | |
ensure | |
puts parser.results | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment