Skip to content

Instantly share code, notes, and snippets.

@guychouk
Last active September 19, 2022 05:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guychouk/8663b49b9b9f7d71e70b5021d5ac1376 to your computer and use it in GitHub Desktop.
Save guychouk/8663b49b9b9f7d71e70b5021d5ac1376 to your computer and use it in GitHub Desktop.
Insert MSN messenger chats XMLs to a SQLite DB.
'''
This script checks for the existence of a directory called "chats"
in the current working directory and looks for XML files to read.
Once the parsing is done, it creates an SQLite DB file, and a table
for storing the date, from, to, text and style attributes of the messages.
'''
import os
import sys
import csv
import sqlite3
import xml.etree.ElementTree as ET
sys.stdout.reconfigure(encoding='utf-8')
def parse_chat_xml_to_list(xml_file):
messages = []
tree = ET.parse(os.path.join(os.path.dirname(__file__), 'chats', xml_file))
for message in tree.getroot().findall('Message'):
try:
date_time = message.attrib['DateTime']
from_user = message.find('From').find('User').attrib['FriendlyName']
to_user = message.find('To').find('User').attrib['FriendlyName']
msg_text = message.find('Text').text
msg_attributes = message.find('Text').attrib
msg_style = ''
if hasattr(msg_attributes, 'Style'):
msg_style = msg_attributes['Style']
messages.append(
[date_time, from_user, to_user, msg_text, msg_style])
except Exception as e:
raise e
return messages
def generate_sqlite_db_from_chats(chats):
con = sqlite3.connect("output/msn-chats.db")
cur = con.cursor()
try:
cur.execute(
"CREATE TABLE chats (id INTEGER PRIMARY KEY AUTOINCREMENT, msg_date, msg_from, msg_to, msg_text, msg_style);")
to_db = [(i[0], i[1], i[2], i[3], i[4]) for i in chats]
cur.executemany(
"INSERT INTO chats (msg_date, msg_from, msg_to, msg_text, msg_style) VALUES (?, ?, ?, ?, ?);", to_db)
con.commit()
con.close()
except Exception as e:
con.close()
raise e
def main():
chats_data = []
for file in os.listdir("chats"):
if file.endswith(".xml"):
try:
chats_data = chats_data + parse_chat_xml_to_list(file)
except Exception as e:
print(e)
sys.exit()
generate_sqlite_db_from_chats(chats_data)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment