Skip to content

Instantly share code, notes, and snippets.

@zakx
Created June 19, 2014 12:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zakx/0ac8ac66bf2def3dda5d to your computer and use it in GitHub Desktop.
Save zakx/0ac8ac66bf2def3dda5d to your computer and use it in GitHub Desktop.
irssi log format to CSV converter
# encoding: utf-8
"""
not handled:
* nick changes
* topic changes
* mode changes
"""
import csv
import datetime
import glob
import logging
import re
import sys
logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.DEBUG)
RE_PRIVMSG = re.compile(r'^<(?P<nick>[^\>]+)> (?P<message>.*)$')
RE_JPQ = re.compile(r'^ (?P<nick>[^\>]+) \((?P<host>.+\@.+)\) has (?P<action>(joined|left|quit)) (?:#nodrama.de\s?)?(?:\(?(?P<message>.+?)?\)?)$')
JPQ_EVENTS = {
'joined': 'JOIN',
'left': 'PART',
'quit': 'QUIT'
}
RE_ACTION = re.compile(r'^ \* (?P<nick>[^\s]+) (?P<message>.*)$')
RE_NOTICE = re.compile(r'^\-(?P<nick>[^\|-]*)[^-]+?- (?P<message>.*)$')
RE_FILE = re.compile(r'^logs/(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}).txt$')
def parse_line(line):
# Skip client comments
if line.startswith('--- '):
return None
# Parse time
try:
hour = int(line[0:2])
minute = int(line[3:5])
except ValueError:
logging.error("Faulty time parsing for line: [%s]" % line)
return None
# remove time and newline from line
line = line.strip('\n').strip('\r')
content = line[6:]
# handle PRIVMSG
if line[6] == '<':
match = RE_PRIVMSG.match(content)
data = match.groupdict()
return {
'hour': hour,
'minute': minute,
'event': 'PRIVMSG',
'nick': data['nick'],
'host': None,
'message': data['message'],
}
# handle JOIN/PART/QUIT and "/me PRIVMSG" (we'll call it ACTION)
if line[6] == " ":
if line[8] == "*":
# ACTION
match = RE_ACTION.match(content)
data = match.groupdict()
event = "ACTION"
else:
# JOIN/PART/QUIT
match = RE_JPQ.match(content)
try:
data = match.groupdict()
except AttributeError:
logging.error("Failed to match in JPQ: [%s]" % line)
return None
event = JPQ_EVENTS[data['action']]
return {
'hour': hour,
'minute': minute,
'event': event,
'nick': data['nick'],
'host': data['host'],
'message': data['message'],
}
# handle NOTICE
if line[6] == "-":
match = RE_NOTICE.match(content)
try:
data = match.groupdict()
except AttributeError:
logging.error("Failed to match in NOTICE: [%s]" % line)
sys.exit(1)
return None
return {
'hour': hour,
'minute': minute,
'event': 'NOTICE',
'nick': data['nick'],
'host': None,
'message': data['message'],
}
logging.error("Failed parsing [%s]" % line)
def build_csv_line(year, month, day, hour, minute, event, nick, host=None, message=None):
if host == None:
host = ""
if message == None:
message = ""
return (year, month, day, hour, minute, datetime.datetime(int(year), int(month), int(day), hour, minute).isoformat(' '), event, nick, host, message)
csv_data = []
for logfile in glob.glob("logs/*.txt"):
print logfile
match = RE_FILE.match(logfile)
ymd = match.groupdict()
logging.info("Parsing %(year)s-%(month)s-%(day)s" % ymd)
with open(logfile, "r") as f:
for line in f:
transfer = parse_line(line)
if transfer == None:
continue
parsed = build_csv_line(**dict(ymd.items()+transfer.items()))
csv_data.append(parsed)
with open("parsed.csv", "w") as o:
writer = csv.writer(o, delimiter=";")
writer.writerow(("year", "month", "day", "hour", "minute", "ts", "event", "nick", "host", "message"))
writer.writerows(csv_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment