Created
September 25, 2008 23:10
-
-
Save anonymous/12978 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os | |
import psycopg2 | |
import re | |
import time | |
import psyco | |
psyco.full() | |
connection = psycopg2.connect("dbname='testing' user='test' host='localhost' password='pass'"); | |
cursor = connection.cursor() | |
file_list = os.listdir("logs") | |
def find_date(date): | |
cursor.execute("SELECT * FROM days WHERE date = '%s'" % time.strftime("%d-%m-%Y", date)) | |
return cursor.fetchone() | |
def find_hostname(hostname): | |
cursor.execute("SELECT * FROM hostnames WHERE hostname = '%s'" % hostname) | |
return cursor.fetchone() | |
def find_person(name): | |
name = name.replace("\\", "\\\\") | |
cursor.execute("SELECT * FROM people WHERE name = '%s'" % name) | |
return cursor.fetchone() | |
def log_hostname(name, person_id): | |
hostname = find_hostname(name) | |
if hostname == None: | |
cursor.execute("INSERT INTO hostnames (hostname) VALUES ('%s')" % name) | |
connection.commit() | |
hostname = find_hostname(name) | |
hostname_id = str(hostname[0]) | |
cursor.execute("SELECT * FROM hostnames_people WHERE hostname_id = '%s' AND person_id = '%s'" % (hostname_id, person_id)) | |
join = cursor.fetchone() | |
if join == None: | |
cursor.execute("INSERT INTO hostnames_people (hostname_id, person_id) VALUES ('%s', '%s')" % (hostname_id, person_id)) | |
connection.commit() | |
return hostname[0] | |
def log_date(date): | |
d = find_date(date) | |
if d == None: | |
cursor.execute("INSERT INTO days (date) VALUES ('%s')" % time.strftime("%d-%m-%Y", date)) | |
connection.commit() | |
def log_person(name): | |
name = re.sub('<', '', name) | |
name = re.sub('>', '', name) | |
person = find_person(name) | |
if person == None: | |
cursor.execute("INSERT INTO people (name) VALUES ('%s')" % name.replace("\\", "\\\\")) | |
connection.commit() | |
person = find_person(name) | |
return str(person[0]) | |
def time_to_string(time): | |
return time.strftime("%d-%m-%Y %H:%M", t) | |
def sanitize(string): | |
return unicode(" ".join(string).replace("\\", "\\\\").replace('"', '\\"').replace("'", "\\'"), "iso-8859-1") | |
channel = '#rubyonrails' | |
for file in file_list: | |
if os.path.isdir(file) == False: | |
f = open(os.path.join("logs", file)) | |
print file | |
date = re.sub('ror.log.','', file) | |
lines = f.readlines() | |
line_count = 0 | |
time_taken = time.time() | |
log_date(time.strptime(date, "%Y%m%d")) | |
for line in lines: | |
line_count += 1 | |
if line_count % 100 == 0: | |
print "100 lines done in: " + str(time.time() - time_taken) | |
time_taken = time.time() | |
l = re.sub(r'\n$', '', line).split(" ") | |
t = time.strptime(date + " " + re.sub(r'\[|\]','',l[0]), "%Y%m%d %H:%M") | |
# when a person does /me | |
if l[1] == "Action:": | |
person_id = log_person(l[2]) | |
message = l[3:len(l)] | |
cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel, message) VALUES ('%s', 'part', '%s' , '%s', '%s')" % (person_id, time_to_string(time), channel, sanitize(message))) | |
# when a person parts.. | |
elif len(l) >= 5 and (l[4] == channel or l[4] == channel + "." or l[1] == channel or l[4] == "irc:") and re.match(r'<', l[1]) == None: | |
log_hostname(l[2], person_id) | |
if l[4] == "irc:": | |
person_id = log_person(l[1]) | |
cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel) VALUES ('%s', 'quit', '%s', '%s')" % (person_id, time_to_string(time), channel)) | |
if l[3] == 'left' and l[4] == channel: | |
person_id = log_person(l[1]) | |
cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel) VALUES ('%s', 'part', '%s' , '%s')" % (person_id, time_to_string(time), channel)) | |
if l[3] == 'joined': | |
person_id = log_person(l[1]) | |
cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel) VALUES ('%s', 'join', '%s' , '%s')" % (person_id, time_to_string(time), channel)) | |
elif " ".join(l[2:4]) == "kicked from " + channel: | |
person_id = log_person(l[1]) | |
other_person_id = log_person(l[6]) | |
cursor.execute("INSERT INTO chats (person_id, other_person_id, message_type, created_at, channel, message) VALUES ('%s','%s', 'kick', '%s' , '%s', '%s')" % (person_id, other_person_id, time_to_string(time), channel, sanitize(t[7:len(t)]))) | |
elif l[1] == channel + ":" and " ".join(l[2:3]) == "mode change": | |
person_id = log_person(l[7].split('!')[0]) | |
cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel, message) VALUES ('%s', 'mode', '%s' ,'%s', '%s')" % (person_id, time_to_string(time), channel, t[4:5])) | |
elif " ".join(l[1:2]) == "Nick change:": | |
person_id = log_person(l[3]) | |
other_person_id = log_person(l[5]) | |
cursor.execute("INSERT INTO chats (person_id, other_person_id, message_type, created_at, channel) VALUES ('%s','%s', 'nick-change','%s' , '%s')" % (person_id, other_person_id, time_to_string(time), channel)) | |
else: | |
person_id = log_person(l[1]) | |
cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel, message) VALUES ('%s', 'message', '%s' , '%s', '%s')" % (person_id, time_to_string(time), channel, sanitize(l[2:len(l)]))) | |
cursor.execute("UPDATE people SET chats_count = chats_count + 1 WHERE id = '%s'" % person_id) | |
connection.commit() | |
connection.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment