Skip to content

Instantly share code, notes, and snippets.

@jibing57
Last active May 26, 2020 18:27
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jibing57/ea180bfc3f7cb96e4a1fa67aa7a7c0c2 to your computer and use it in GitHub Desktop.
Save jibing57/ea180bfc3f7cb96e4a1fa67aa7a7c0c2 to your computer and use it in GitHub Desktop.
a simple ruby script to parse aws elb access log to csv format file
require 'csv'
require 'optparse'
class AWSELBAccessLogParser
def initialize()
@@elb_access_log_format=%Q(timestamp elb client:port backend:port request_processing_time backend_processing_time response_processing_time elb_status_code backend_status_code received_bytes sent_bytes "request" "user_agent" ssl_cipher ssl_protocol)
# puts "elb_access_log_format is #{elb_access_log_format.split(" ")}"
@@line_regex = /
(?<timestamp>[^ ]*) # timestamp
\s+(?<elb>[^ ]*) # elb
\s+(?<client>[^ ]*):(?<client_port>[0-9]*) # client:port
\s+(?<backend>[^ ]*):(?<backend_port>[0-9]*) # backend:port
\s+(?<request_processing_time>[-.0-9]*) # request_processing_time value: 0.000056 or -1
\s+(?<backend_processing_time>[-.0-9]*) # backend_processing_time value: 0.093779 or -1
\s+(?<response_processing_time>[-.0-9]*) # response_processing_time value: 0.000049 or -1
\s+(?<elb_status_code>-|[0-9]*) # elb_status_code
\s+(?<backend_status_code>-|[0-9]*) # backend_status_code
\s+(?<received_bytes>[-0-9]*) # received_bytes
\s+(?<sent_bytes>[-0-9]*) # sent_bytes
# \s+\"(?<request_method>[^ ]*)\s+(?<request_uri>[^ ]*)\s+(?<request_version>- |[^ ]*)\" # request section
\s+\"(?<request>[^ ]*\s+[^ ]*\s+[^ ]*)\" # entire request
\s+\"(?<user_agent>[^ ]*.*[^ ]*)\" # entire user_agent
\s+(?<ssl_cipher>[^ ]*) # ssl_cipher
\s+(?<ssl_protocol>[^ ]*) # ssl_protocol
/x
end
def parse_line(line)
return nil if line.nil?
line.match(@@line_regex)
end
def parse_log_to_csv(src_file, dst_file)
if src_file.nil? or dst_file.nil?
puts "please entry the right src_file and dst_file"
return false
end
if !File.readable?(src_file)
puts "src_file[#{src_file}] is not readable"
return false
end
if !File.writable?(File.dirname(dst_file))
puts "dst_file[#{dst_file}] is not writable"
return false
end
# output fields name to dst_file
CSV.open(dst_file, "w") do |data|
first_line = File.open(src_file, "r") {|f| f.readline}
puts "first_line of file[#{src_file}] is #{first_line}"
parts = self.parse_line(first_line)
data << parts.names
end
# parse the log file and store to dest csv file
CSV.open(dst_file, "a+") do |data|
File.open(src_file, "r").each do |line|
parts = parse_line(line)
if parts == nil
puts "Error -- Can't parse line [#{line}]"
next
end
line_csv_array=[]
parts.names.each { |filed_name| line_csv_array.push(parts[filed_name]) }
# puts line_csv_array.inspect
data << line_csv_array
end
end
end
end
# Parse the command line
src_access_log=""
dest_csv_file=""
options = {}
begin
opts = OptionParser.new
opts.banner = "Usage: #{$PROGRAM_NAME} [options] ..."
opts.separator ''
opts.separator 'Options:'
opts.on('-s src_access_log',
'--src_access_log src_access_log',
String,
'Set source access log file') {|key| options[:src_access_log] = key}
opts.on('-d dest_csv_file',
'--dest_csv_file dest_csv_file',
String,
'set output csv file name') {|key| options[:dest_csv_file] = key}
opts.on('-h', '--help', 'Show this message') do
puts opts
exit
end
rescue OptionParser::ParseError
puts "Oops... #{$!}"
puts opts
exit
end
begin
opts.parse!
mandatory = [:src_access_log, :dest_csv_file] # Enforce the presence of
missing = mandatory.select{ |param| options[param].nil? }
if not missing.empty?
puts "Missing options: #{missing.join(', ')}"
puts opts
exit -1
end
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
puts $!.to_s
puts opts#
exit -1
end
AWSELBAccessLogParser.new().parse_log_to_csv(options[:src_access_log], options[:dest_csv_file])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment