Skip to content

Instantly share code, notes, and snippets.

@kadin2048
Created November 3, 2021 05:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kadin2048/ad0feef9f4f4230cd207907eceb17452 to your computer and use it in GitHub Desktop.
Save kadin2048/ad0feef9f4f4230cd207907eceb17452 to your computer and use it in GitHub Desktop.
Turn a Pidgin HTML chatlog into a Thunderbird-compatible .eml file so that it can be imported into IMAP for archive purposes.
#!/usr/bin/env python
# Turn an Pidgin HTML chatlog into a Thunderbird-compatible .eml file
# so that it can be imported into Gmail for archive purposes.
#
# Syntax: $ python pidgintoeml.py pidginlogfile.html [outputfilename.eml]
#
# Version: 2011-09-28
#
import sys
import datetime
import os.path
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
# Debug flag; set to False to suppress information
global debug
debug = False
def main():
# Use the first argument as the input file
try:
if sys.argv[1]:
filename = sys.argv[1]
except IndexError:
sys.stderr.write("No input file specified.\n")
return 1
# Second arg, if present, is the ouput file
try:
outfilename = sys.argv[2]
# Note that this will throw IndexError if not present
except IndexError:
# which we catch here
outfilename = filename + '.eml'
# DEBUG
if debug:
print "-- Reading from " + filename
print "-- Writing to " + outfilename
# Test to see if the output file already exists (processed already)
if os.path.isfile(outfilename):
sys.stderr.write("Output file " + outfilename + " already exists. Terminating.\n")
return 1
# Open the in and out files
try:
fi = open(filename, 'r') # fi is a file object
fo = open(outfilename, 'w')
except IOError:
sys.stderr.write("IO Error while opening files.\n")
return 1
# Create a message object
msg_base = MIMEMultipart('mixed')
if filename.split('.')[-1] == 'html':
# For probable Pidgin logs (ending in .html)...
# Process the first line of the input file to determine the eml headers
determineHTMLLogHeaders( fi.readline(), msg_base )
fi.seek(0) # reset file object
if debug:
print "-- Headers after parsing first line are..."
for key, value in msg_base.items():
print key + ": " + value
# Create the HTML payload using the entire file
doctype = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\n'
ht = doctype + fi.read()#.encode('utf-8')
if debug:
print "-- HTML body is of type:"
print type(ht)
msghtml = MIMEText(ht, 'html')#, 'utf-8')
# Attach the HTML to the root
msg_base.attach(msghtml)
if filename.split('.')[-1] == 'chatlog':
# For XML formatted chat logs
sys.stderr.write("XML chatlog processing not implemented. Terminating.\n")
return 1
#if filename.split('.')[-1] ==
if debug:
print "-- Ready to flatten and write message..."
# Write out the message
fo.write( msg_base.as_string() )
if debug:
print "-- Complete."
return 0
def determineHTMLLogHeaders(firstline, msg_base):
# Start by just looking at the <title> element
title = firstline[firstline.find("<title>")+7:firstline.find("</title>")]
if debug:
print "<title>: " + title
# Determine the 'From' address of the chat
# TODO: This would be better done with a regexp but I was lazy
msg_base['From'] = title[title.find("Conversation with ")+18:title.find(" at ")]
if debug:
print "-- From is: " + msg_base['From']
# Determine the 'To' address
msg_base['To'] = title[title.find(" on ")+4:]
if debug:
print "-- To is: " + msg_base['To']
# Now we have to deal with the date. This is messy.
logdate = title[title.find(" at ")+4:title.find(" on ")]
# Turn it into a datetime object
d = datetime.datetime.strptime(logdate, '%m/%d/%Y %I:%M:%S %p')
# Then write it out to RFC822 format
# TODO: This is a naive/stupid way of handling timezone!
msg_base['Date'] = d.strftime("%a, %d %b %Y %H:%M:%S" + " -0500 (EST)")
# And the message subject
msg_base['Subject'] = title[0:title.find(" on ")]
if __name__ == "__main__":
sys.exit( main() ) # program return value is main()'s return value
@kadin2048
Copy link
Author

Please note this is for Python 2.x and will not work on Python 3 without some modifications.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment