Skip to content

Instantly share code, notes, and snippets.

@nakajijapan
Created February 19, 2013 02:21
Show Gist options
  • Save nakajijapan/4982564 to your computer and use it in GitHub Desktop.
Save nakajijapan/4982564 to your computer and use it in GitHub Desktop.
./logs/ディレクトリの中にあるaccess_logのデータをDBに入れるだけの処理
# ./logs/ディレクトリの中にあるaccess_logのデータをDBに入れるだけの処理
require 'mysql2'
require 'active_record'
#
# query
#
def query(sql)
my = Mysql2::Client.new(:host => "localhost", :username => "root", :password => '', :database => 'web_analyze')
my.query("set character set utf8")
res = my.query(sql)
my.close
res
end
ActiveRecord::Base.establish_connection(
adapter: "mysql2",
host: "localhost",
username: "root",
password: "",
database: "web_analyze"
)
class AccessLogs < ActiveRecord::Base
end
#--------------------------------------------------------
# main
#--------------------------------------------------------
Dir::foreach('./logs') {|f|
next if f.index('access.log').nil?
puts "\nanalyzing... #{f}"
lines = []
File.open("./logs/#{f}", :encoding => Encoding::UTF_8) { |f|
str = f.read
lines = str.split("\n")
}
puts "start lines: #{lines.length}"
lines.each_with_index do |line, index|
#break if index == 10
# ["www.petit.cc", "86.72.56.101", "-", "-", "[30/Jan/2013:04:57:56", "+0900]", "GET", "/recommend_img/yowayowa.jpg", "HTTP/1.1", "200", "7373", "http://www.ticroixr.com"]
units = line.gsub(/"/, '').split(' ')
# [["GET /muscat2/ HTTP/1.0"], ["-"], ["Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"]]
units2 = line.scan(/"([^"]+)"/)
times = "#{units[4]},#{units[5]}".gsub(/(\[|\])/, '').gsub(/(\/|:)/, ',').split(',')
# ignore log
next if /\.(js|css|jpe?g|png|gif)/ =~ units[7]
# ["30", "Jan", "2013", "03", "17", "07", "+0900"]
begin
time = Time.new(
times[2].to_i,
times[1],
times[0].to_i,
times[3].to_i,
times[4].to_i,
times[5].to_i,
'+09:00'
).to_s
rescue
p "error >> "
p times
time = nil
end
uid = '-'
uid = units2[4][0] unless units2[4].nil?
ua = '-'
ua = units2[2][0] unless units2[2].nil?
log_info = {
domain: units[0],
ip: units[1],
access_time: time,
http_method: units[6],
http_code: units[9].to_i,
url: units[7],
size: units[10],
user_agent: ua,
uid: uid,
}
#puts log_info
# save db
a = AccessLogs.new
a.domain = log_info[:domain]
a.ip = log_info[:ip]
a.access_time = log_info[:access_time]
a.http_method = log_info[:http_method]
a.http_code = log_info[:http_code]
a.url = log_info[:url]
a.size = log_info[:size]
a.user_agent = log_info[:user_agent]
a.uid = log_info[:uid]
a.save
STDOUT.write "\r #{index} / #{lines.length}"
end
}
@nakajijapan
Copy link
Author

最近はLTSV(Labeled Tab-separated Values)形式に直してるのでもう利用していない!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment