Skip to content

Instantly share code, notes, and snippets.

@jfqd
Created January 31, 2011 08:22
Show Gist options
  • Select an option

  • Save jfqd/803770 to your computer and use it in GitHub Desktop.

Select an option

Save jfqd/803770 to your computer and use it in GitHub Desktop.
import apache access.log files into a mysql table
#!/usr/bin/ruby
require "rubygems"
require "apachelogregex"
require "resolv.rb"
require "mysql"
require 'parsedate'
######################################
# Database parameters
HOST = "<%= apachelog_host %>"
UID = "<%= apachelog_uid %>"
PWD = "<%= apachelog_pwd %>"
DBASE = "log"
TABLE = "apache"
######################################
# Other parameters
SKIP_LOCALHOST = true
######################################
# Apache Log format vhost
format = '%{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" - %{Host}i'
######################################
# Class extensions
class String
# cleanup string for sql query
def cleanup
self.gsub(/[\[\]\"'\\]/, '')
end
# resolve hostname of a given ip
def hostname
Resolv.getname(self).to_s rescue ""
end
# convert apache log time format "29/May/2009:03:00:37 +0200"
# to sql time "2007-11-18 15:58:36"
def sql_date
self[11]= " "
pd = ParseDate.parsedate(self)
"#{pd[0]}-#{pd[1].two_digits}-#{pd[2].two_digits} #{pd[3].two_digits}:#{pd[4].two_digits}:#{pd[5].two_digits}"
end
def ipnum
a = self.split(".")
16777216*a[0].to_i + 65536*a[1].to_i + 256*a[2].to_i + a[3].to_i
end
end
class Fixnum
def two_digits
("0" + self.to_s)[-2..-1]
end
end
######################################
# Script method
def separate(logline)
ip = logline["%{X-Forwarded-For}i"]
return if (ip == "127.0.0.1" || ip == "-" || ip == "::1") && @skip_localhost
host = @previp == ip ? @prevhn : ip.hostname
@previp = ip
@prevhn = host
identd = logline["%l"]
userid = logline["%u"]
rtime = logline["%t"].cleanup
stime = rtime.sql_date
request = logline["%r"].cleanup
state = logline["%>s"]
bytes = logline["%b"]
referer = logline["%{Referer}i"].cleanup
agent = logline["%{User-Agent}i"].cleanup
vhost = logline["%{Host}i"].cleanup
country = country_query(ip)
country_code = country[0]
country_name = country[1]
# create and execute query
dbquery ="INSERT INTO #{@table} (ip, host, identd, userid, time, request, state, bytes, referer, agent, vhost, country_code, country_name, created_at) VALUES ('#{ip}', '#{host}', '#{identd}', '#{userid}', '#{stime}', '#{request}', '#{state}', '#{bytes}', '#{referer}', '#{agent}', '#{vhost}', '#{country_code}', '#{country_name}', '#{@log_started}')"
@dbh.query(dbquery)
# puts dbquery
end
def country_query(ip)
begin
result = Array.new
raise if (ip == "127.0.0.1" || ip == "-" || ip == "::1")
num = ip.ipnum
dbquery = "SELECT country, name FROM geoip WHERE #{num.to_s} >= begin_num AND #{num.to_s} <= end_num LIMIT 1"
res = @dbh.query(dbquery)
res.each do |row|
result = [ row[0], row[1] ]
end
rescue
result = [ "", "" ]
end
return result
end
def log_date(date=Time.now)
date.utc.strftime("%Y-%m-%d %H:%M:%S")
end
######################################
# Parameters setup
logfile= ARGV.shift
parser = ApacheLogRegex.new(format)
counter = 0
@previp = ""
@prevhn = ""
@table = TABLE
@skip_localhost = SKIP_LOCALHOST
@log_started = log_date.to_s
######################################
# main entry
begin
# verify logfile
# connect to the MySQL server
puts "#{log_date}: Opening mysql connection..."
@dbh = Mysql.real_connect(HOST, UID, PWD, DBASE)
# read logfile
File.readlines(logfile).collect do |line|
begin
counter += 1
l = parser.parse(line)
separate(l) if l
rescue ApacheLogRegex::ParseError => e
nil
end
end
puts "#{log_date}: Parsed #{counter.to_s} lines..."
rescue Mysql::Error => e
puts "Error code: #{e.errno}"
puts "Error message: #{e.error}"
puts "Error SQLSTATE: #{e.sqlstate}" if e.respond_to?("sqlstate")
ensure
# disconnect from server
@dbh.close if @dbh
puts "#{log_date}: Closing mysql connection..."
puts "---------------------------"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment