|
#!/usr/bin/env python |
|
|
|
# Take an IBM Sametime HTML log file, determine the date, and create |
|
# an RFC-compliant email message from it, for importation into an MUA |
|
# |
|
# Usage: sametimetoeml.py inputfile.html |
|
# Where inputfile.html is a Sametime log located in a dated folder |
|
# (See readme for more useful suggestions.) |
|
# |
|
# Written for Python 2.6 |
|
|
|
|
|
import sys |
|
import os |
|
import dateutil.parser |
|
import time |
|
|
|
from email.MIMEMultipart import MIMEMultipart |
|
from email.MIMEText import MIMEText |
|
|
|
import xml.etree.ElementTree |
|
|
|
# Program-wide variables |
|
global debug |
|
debug = False # Debug toggle |
|
|
|
|
|
# Class definition |
|
class Chatlog: |
|
"""Chatlog class, for holding chat log and metadata during conversion.""" |
|
def __init__(self, filepath, logtype): |
|
# Instance |
|
self.filepath = filepath |
|
self.logtype = logtype |
|
|
|
def setHTML(self): |
|
if (self.logtype == "html"): |
|
try: |
|
infile = open(self.filepath,'r') |
|
except: |
|
raise # raise exception if we can't read file |
|
self.html = infile.read() |
|
# Then parse it using ElementTree |
|
self.tree = xml.etree.ElementTree.fromstring(self.html) |
|
|
|
def setBuddyName(self): |
|
# remote buddy's name should always be the grandparent folder |
|
self.buddyname = self.filepath.split(os.sep)[-3] |
|
|
|
def setMetatagdata(self): |
|
# <meta name="sametime:lastActivityTime" content="20070112-131123 (-0500)"/> |
|
# This is for Python 2.6 |
|
for element in self.tree.getiterator(tag="meta"): |
|
try: |
|
if element.attrib['http-equiv'] == 'Content-Type': |
|
self.contenttype = element.attrib['content'] |
|
if debug: |
|
sys.stdout.write("HTTP Content-Type is: " + self.contenttype + "\n") |
|
except KeyError: |
|
pass # ignore KeyError |
|
|
|
try: |
|
if element.attrib['name'] == "sametime:creationTime": |
|
self.datetimestr = element.attrib['content'] |
|
if debug: |
|
sys.stdout.write("Creation time is " + self.datetimestr + "\n") |
|
|
|
# This works as long as the time has seconds... |
|
#self.isotime = self.datetimestr[:4] + '-' + self.datetimestr[4:6] + '-' + self.datetimestr[6:8] + 'T' + self.datetimestr[9:11] + ':' + self.datetimestr[11:13] + ':' + self.datetimestr[13:15] + self.datetimestr[17:20] + ':' + self.datetimestr[20:22] |
|
#if debug: |
|
# sys.stdout.write("ISO format datetime is: " + self.isotime + "\n") |
|
|
|
self.datetime = dateutil.parser.parse(self.datetimestr, fuzzy=True) |
|
|
|
self.isotime = self.datetime.isoformat() |
|
|
|
if debug: |
|
sys.stdout.write("Pretty date is: " + self.datetime.strftime("%a, %d %b %Y %H:%M:%S") + "\n") |
|
except KeyError: |
|
pass # ignore KeyError |
|
|
|
try: |
|
if element.attrib['name'] == 'sametime:username': |
|
self.username = element.attrib['content'] |
|
if debug: |
|
sys.stdout.write("Sametime username is " + self.username + "\n") |
|
except KeyError: |
|
pass # ignore KeyError |
|
|
|
|
|
|
|
# Processing work |
|
def main(): |
|
# First argument is the input file |
|
infilename = sys.argv[1] |
|
|
|
# Make sure infilename at least ends in .html before processing it |
|
if infilename[-4:] != "html": |
|
if debug: |
|
sys.stderr.write("Input filename does not end with html\n") |
|
return(1) #exit with error |
|
|
|
if debug: |
|
sys.stdout.write("Input filename: " + infilename + "\n") |
|
|
|
# Then get the path |
|
filepath = os.path.abspath(infilename) #filepath is a string |
|
if debug: |
|
sys.stdout.write("Input path is: " + str(filepath) + "\n") |
|
sys.stdout.write("Directory path separator char is: " + str(os.sep) + "\n") |
|
|
|
# instantiate Chatlog object |
|
if debug: |
|
sys.stdout.write("Instantiating chatlog object...\n") |
|
chatlog = Chatlog(filepath, "html") |
|
|
|
# read file contents into memory |
|
if debug: |
|
sys.stdout.write("Reading file contents...\n") |
|
chatlog.setHTML() |
|
|
|
# get the remote buddy's name from the path |
|
if debug: |
|
sys.stdout.write("Determining buddy name from path...\n") |
|
chatlog.setBuddyName() |
|
if debug: |
|
sys.stdout.write("Buddy name is: " + str(chatlog.buddyname) + "\n") |
|
|
|
# Parse the HTML and set other metadata values |
|
if debug: |
|
sys.stdout.write("Set meta tag values from HTML...\n") |
|
chatlog.setMetatagdata() |
|
|
|
# create message object for the output |
|
msg_base = MIMEMultipart('mixed') |
|
|
|
# set message headers |
|
msg_base['Subject'] = "Sametime with " + chatlog.buddyname |
|
msg_base['Date'] = chatlog.datetime.strftime("%a, %d %b %Y %H:%M:%S") |
|
msg_base['From'] = chatlog.buddyname # TODO: set this to the chat originator |
|
msg_base['To'] = chatlog.username # TODO: set this to username unless username == originator, in which case buddyname |
|
#msg_base['X-Original-Filename'] = infilename |
|
msg_base['X-Converted-On'] = time.strftime("%a, %d %b %Y %H:%M:%S") #timezones are hard... |
|
|
|
if debug: |
|
print "-- Headers after parsing first line are..." |
|
for key, value in msg_base.items(): |
|
print key + ": " + value |
|
|
|
# create message content |
|
encoding = chatlog.contenttype.split(';')[1].split('=')[1] # get encoding (probably UTF-8) from HTML content-type header |
|
content = MIMEText(chatlog.html, 'html', encoding) |
|
|
|
msg_base.attach(content) |
|
|
|
# Second arg, if present, is the output file |
|
try: |
|
outfilename = sys.argv[2] |
|
except IndexError: |
|
# default output is to cwd with same basename but .eml instead of .html |
|
outfilename = os.getcwd() + os.sep + chatlog.buddyname + ' (' + chatlog.datetime.strftime("%Y-%m-%dT%H%M") + ').eml' |
|
|
|
if debug: |
|
sys.stdout.write("Output file is: " + outfilename + "\n") |
|
|
|
fo = open(outfilename, 'w') |
|
fo.write( msg_base.as_string() ) |
|
|
|
return 0 |
|
|
|
|
|
|
|
# If run as standalone, execute main loop |
|
if __name__ == "__main__": |
|
sys.exit( main() ) # program return value is main()'s return value |