Created
January 31, 2011 08:22
-
-
Save jfqd/803770 to your computer and use it in GitHub Desktop.
import apache access.log files into a mysql table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/ruby | |
| require "rubygems" | |
| require "apachelogregex" | |
| require "resolv.rb" | |
| require "mysql" | |
| require 'parsedate' | |
| ###################################### | |
| # Database parameters | |
| HOST = "<%= apachelog_host %>" | |
| UID = "<%= apachelog_uid %>" | |
| PWD = "<%= apachelog_pwd %>" | |
| DBASE = "log" | |
| TABLE = "apache" | |
| ###################################### | |
| # Other parameters | |
| SKIP_LOCALHOST = true | |
| ###################################### | |
| # Apache Log format vhost | |
| format = '%{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" - %{Host}i' | |
| ###################################### | |
| # Class extensions | |
| class String | |
| # cleanup string for sql query | |
| def cleanup | |
| self.gsub(/[\[\]\"'\\]/, '') | |
| end | |
| # resolve hostname of a given ip | |
| def hostname | |
| Resolv.getname(self).to_s rescue "" | |
| end | |
| # convert apache log time format "29/May/2009:03:00:37 +0200" | |
| # to sql time "2007-11-18 15:58:36" | |
| def sql_date | |
| self[11]= " " | |
| pd = ParseDate.parsedate(self) | |
| "#{pd[0]}-#{pd[1].two_digits}-#{pd[2].two_digits} #{pd[3].two_digits}:#{pd[4].two_digits}:#{pd[5].two_digits}" | |
| end | |
| def ipnum | |
| a = self.split(".") | |
| 16777216*a[0].to_i + 65536*a[1].to_i + 256*a[2].to_i + a[3].to_i | |
| end | |
| end | |
| class Fixnum | |
| def two_digits | |
| ("0" + self.to_s)[-2..-1] | |
| end | |
| end | |
| ###################################### | |
| # Script method | |
| def separate(logline) | |
| ip = logline["%{X-Forwarded-For}i"] | |
| return if (ip == "127.0.0.1" || ip == "-" || ip == "::1") && @skip_localhost | |
| host = @previp == ip ? @prevhn : ip.hostname | |
| @previp = ip | |
| @prevhn = host | |
| identd = logline["%l"] | |
| userid = logline["%u"] | |
| rtime = logline["%t"].cleanup | |
| stime = rtime.sql_date | |
| request = logline["%r"].cleanup | |
| state = logline["%>s"] | |
| bytes = logline["%b"] | |
| referer = logline["%{Referer}i"].cleanup | |
| agent = logline["%{User-Agent}i"].cleanup | |
| vhost = logline["%{Host}i"].cleanup | |
| country = country_query(ip) | |
| country_code = country[0] | |
| country_name = country[1] | |
| # create and execute query | |
| dbquery ="INSERT INTO #{@table} (ip, host, identd, userid, time, request, state, bytes, referer, agent, vhost, country_code, country_name, created_at) VALUES ('#{ip}', '#{host}', '#{identd}', '#{userid}', '#{stime}', '#{request}', '#{state}', '#{bytes}', '#{referer}', '#{agent}', '#{vhost}', '#{country_code}', '#{country_name}', '#{@log_started}')" | |
| @dbh.query(dbquery) | |
| # puts dbquery | |
| end | |
| def country_query(ip) | |
| begin | |
| result = Array.new | |
| raise if (ip == "127.0.0.1" || ip == "-" || ip == "::1") | |
| num = ip.ipnum | |
| dbquery = "SELECT country, name FROM geoip WHERE #{num.to_s} >= begin_num AND #{num.to_s} <= end_num LIMIT 1" | |
| res = @dbh.query(dbquery) | |
| res.each do |row| | |
| result = [ row[0], row[1] ] | |
| end | |
| rescue | |
| result = [ "", "" ] | |
| end | |
| return result | |
| end | |
| def log_date(date=Time.now) | |
| date.utc.strftime("%Y-%m-%d %H:%M:%S") | |
| end | |
| ###################################### | |
| # Parameters setup | |
| logfile= ARGV.shift | |
| parser = ApacheLogRegex.new(format) | |
| counter = 0 | |
| @previp = "" | |
| @prevhn = "" | |
| @table = TABLE | |
| @skip_localhost = SKIP_LOCALHOST | |
| @log_started = log_date.to_s | |
| ###################################### | |
| # main entry | |
| begin | |
| # verify logfile | |
| # connect to the MySQL server | |
| puts "#{log_date}: Opening mysql connection..." | |
| @dbh = Mysql.real_connect(HOST, UID, PWD, DBASE) | |
| # read logfile | |
| File.readlines(logfile).collect do |line| | |
| begin | |
| counter += 1 | |
| l = parser.parse(line) | |
| separate(l) if l | |
| rescue ApacheLogRegex::ParseError => e | |
| nil | |
| end | |
| end | |
| puts "#{log_date}: Parsed #{counter.to_s} lines..." | |
| rescue Mysql::Error => e | |
| puts "Error code: #{e.errno}" | |
| puts "Error message: #{e.error}" | |
| puts "Error SQLSTATE: #{e.sqlstate}" if e.respond_to?("sqlstate") | |
| ensure | |
| # disconnect from server | |
| @dbh.close if @dbh | |
| puts "#{log_date}: Closing mysql connection..." | |
| puts "---------------------------" | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment