Skip to content

Instantly share code, notes, and snippets.

@mechamogera
Created July 16, 2013 05:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mechamogera/6006110 to your computer and use it in GitHub Desktop.
Save mechamogera/6006110 to your computer and use it in GitHub Desktop.
require 'rubygems'
gem 'aws-sdk'
require 'aws'
require 'optparse'
require 'ostruct'
require 'time'
require 'yaml'
require 'csv'
options = OpenStruct.new
options.config_file = "./get_s3_access_log.yml"
opt = OptionParser.new
opt.on('-a', '--access-key-id=VAL') { |v| options.access_key_id = v }
opt.on('-s', '--secret-access-key=VAL') { |v| options.secret_access_key = v }
opt.on('-e', '--endpoint=VAL') { |v| options.endpoint = v }
opt.on('-b', '--bucket=VAL') { |v| options.bucket = v }
opt.on('-p', '--prefix=VAL') { |v| options.prefix = v }
opt.on('-f', '--first-time=VAL') { |v| options.first_time = Time.parse(v) }
opt.on('-l', '--last-time=VAL') { |v| options.last_time = Time.parse(v) }
opt.on('-o', '--output-format=VAL') { |v| options.output_format = v.to_sym }
opt.on('-c', '--config-file=VAL', "Default:#{options.config_file}") { |v| options.config_file = v }
opt.on('-t', '--target-config=VAL') { |v| options.target_config = v }
opt.parse!(ARGV)
if File.exist?(options.config_file)
data = YAML.load_file(options.config_file)
data[options.target_config].each do |key, value|
options.send("#{key}=", options.send(:key) || value)
end
end
s3 = AWS::S3.new(:access_key_id => options.access_key_id,
:secret_access_key => options.secret_access_key,
:s3_endpoint => options.endpoint,
:proxy_uri => ENV['HTTP_PROXY'] || ENV['http_proxy'])
bucket = s3.buckets[options.bucket]
objects = []
bucket.objects.with_prefix(options.prefix).each do |object|
time_str = object.key.scan(/^#{Regexp.escape(options.prefix)}(\d{4}(?:-\d{2}){5})/)[0][0]
time = Time.parse(time_str.split("-", 4).map { |x| x.gsub("-", ":") }.join("-") + "UTC")
objects << object if (options.first_time <= time) && (options.last_time > time)
end
objects.each do |object|
if options.output_format == :ltsv
log = "bucket_owner bucket time remote_ip requester request_id operation key request_uri http_status error_code bytes_sent object_size total_time turn_around_time referrer user_agent version_id\n"
object.read do |chunk|
log += chunk.gsub(/\[\d+\/[A-Za-z]+\/(?:\d+:?){5}\s\+\d{4}\]/) { |x| %Q!"#{x}"! }
end
CSV.parse(log, :headers => true,
:header_converters => :symbol,
:col_sep => " ").each do |x|
puts x.to_hash.to_a.map { |x| x.join(":") }.join("\t")
end
else
object.read { |chunk| puts chunk }
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment