Skip to content

Instantly share code, notes, and snippets.

@tessi
Created July 25, 2013 14:10
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tessi/6080040 to your computer and use it in GitHub Desktop.
Save tessi/6080040 to your computer and use it in GitHub Desktop.
read WorldCup98 access log file in ruby see: http://ita.ee.lbl.gov/html/contrib/WorldCup.html
Entry = Struct.new(:timestamp, :clientID, :objectID, :size, :method, :status, :type, :server) do
REGIONS = [:SantaClara, :Plano, :Herndon, :Paris]
PROTOCOLS = [:HTTP_09, :HTTP_10, :HTTP_11, :HTTP_XX]
METHODS = [:get, :head, :post, :put, :delete, :trace, :options, :connect, :other]
STATUS_CODES = [100, 101, 200, 201, 202, 203, 204, 205, 206, 300, 301, 302, 303, 304, 305, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 500, 501, 502, 503, 504, 505, :other]
FILE_TYPES = [:html, :image, :audio, :video, :java, :formatted, :dynamic, :text, :compressed, :programs, :directory, :icl, :other]
def self.parse(bytes)
values = bytes.unpack("NNNNCCCC")
entry = self.new
entry.timestamp = Time.at(values[0])
entry[:clientID], entry[:objectID], entry[:size] = values[1..3]
entry[:method] = METHODS[values[4]]
entry[:status] = {
:protocol => PROTOCOLS[values[5] >> 6],
:status_code => STATUS_CODES[values[5] & 0x3F]
}
entry[:type] = FILE_TYPES[values[6]]
entry[:server] = {
:region => REGIONS[values[7] >> 5],
:server_number => (values[7] & 0x1F)
}
entry
end
end
entries = []
File.open('/path/to/file/../for/example/wc_day61_1', 'rb') do |io|
while byte_string = io.read(20)
entries << Entry.parse(byte_string)
end
end
entries.first
# => #<struct Entry
# timestamp=1998-06-25 00:00:01 +0200,
# clientID=6400,
# objectID=24643,
# size=872,
# method=:get,
# status={:protocol=>:HTTP_10, :status_code=>200},
# type=:image,
# server={:region=>:SantaClara, :server_number=>3}>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment