Skip to content

Instantly share code, notes, and snippets.

@timprepscius
Forked from hastyeagle/export_messages.py
Created July 7, 2016 19:14
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timprepscius/06a73a615228beee0f3c8242ad2eebc2 to your computer and use it in GitHub Desktop.
Save timprepscius/06a73a615228beee0f3c8242ad2eebc2 to your computer and use it in GitHub Desktop.
Export iOS/iMessage chat logs to HTML or text
#!/usr/bin/env python
import sys, getopt
import urllib
import urlparse
import base64
import mimetypes
import cgi
import sqlite3
import os
import errno
from os import path
from shutil import copy2
# TODO:
# - Add auto-linking
# - Export video media
# - Match chat IDs up to names using Contacts.app SQLite db
CHAT_DB = path.expanduser("~/Library/Messages/chat.db")
ATTACHDIR = "export-Attachments"
# Apple's epoch starts on January 1st, 2001 for some reason...
# cf. http://apple.stackexchange.com/questions/114168
EPOCH = 978307200
def list_chats(chatFile):
db = sqlite3.connect(chatFile)
cursor = db.cursor()
cursor2 = db.cursor()
print "Below is a list of IDs and their associated message counts:"
rows = cursor.execute("""
SELECT DISTINCT chat_identifier
FROM chat WHERE is_archived = 0 ORDER BY chat_identifier;
""")
# Loop through each ID and print out the ID and the number of messages
# associated with that ID
for row in rows:
# See how many messages there are for this ID
chat_id = row[0]
sql = """
SELECT COUNT(*)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = \"%s\";
""" % (chat_id)
sql = cursor2.execute(sql)
numRows = sql.fetchone()[0]
if numRows == 0: continue
# Grab the date of the first message
sql = cursor2.execute("""
SELECT substr(datetime(m.date + ?, 'unixepoch', 'localtime'), 0, 11)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = ?
ORDER BY m.date LIMIT 1;
""", (EPOCH, chat_id))
firstDate = sql.fetchone()[0]
# Grab the date of the last message
sql = cursor2.execute("""
SELECT substr(datetime(m.date + ?, 'unixepoch', 'localtime'), 0, 11)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = ?
ORDER BY m.date DESC LIMIT 1;
""", (EPOCH, chat_id))
lastDate = sql.fetchone()[0]
print " " + chat_id + " (" + str(numRows) + " messages, " + firstDate + " to " + lastDate + ")"
def exportID(chatFile, chat_id, prettyID, HTML, separate, keepAttachment):
oldstdout = sys.stdout
db = sqlite3.connect(chatFile)
db.row_factory = sqlite3.Row
cursor = db.cursor()
cursor2 = db.cursor()
if len(prettyID) > 0:
prettyPrint = 1
else:
prettyPrint = 0
prettyID = chat_id
# Grab the date of the first message
sql = cursor.execute("""
SELECT substr(datetime(m.date + ?, 'unixepoch', 'localtime'), 0, 11)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = ?
ORDER BY m.date LIMIT 1;
""", (EPOCH, chat_id))
row = sql.fetchone()
# First make sure data actually exists for this chat_id
if row == None:
print "No chat logs found for '%s'!" % (chat_id)
return
# Now grab the date of the first message
firstDate = row[0]
# Grab the date of the last message
sql = cursor.execute("""
SELECT substr(datetime(m.date + ?, 'unixepoch', 'localtime'), 0, 11)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = ?
ORDER BY m.date DESC LIMIT 1;
""", (EPOCH, chat_id))
lastDate = sql.fetchone()[0]
# Grab all the rows for this chat_id
rows = cursor.execute("""
SELECT datetime(m.date + ?, 'unixepoch', 'localtime') as fmtdate,
m.is_from_me,
m.text,
h.id,
a.filename,
a.mime_type,
a.total_bytes,
a.guid,
a.transfer_name,
m.ROWID
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
INNER JOIN handle as h
ON h.ROWID = m.handle_ID
LEFT JOIN message_attachment_join AS ma
ON ma.message_id = m.ROWID
LEFT JOIN attachment as a
ON a.ROWID = ma.attachment_id
WHERE c.chat_identifier = ?
ORDER BY m.date;
""", (EPOCH, chat_id))
if HTML:
logExt = ".html"
else:
logExt = ".txt"
if separate:
# Output each message to their own file
sys.stdout = open("Export-" + chat_id.replace("+", "") + logExt, 'w')
else:
# Output each message to one file
sys.stdout = oldstdout
# See if this is a group chat instead of an individual conversation
if prettyID[:4] == "chat":
groupChat = 1
# Get the handles that are part of the chat
handles = cursor2.execute("""
SELECT DISTINCT h.id
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
INNER JOIN handle as h
ON h.ROWID = m.handle_id
WHERE c.chat_identifier = ?
ORDER BY h.id;
""", (chat_id,))
chat_handles = ""
prevHandle = ""
chatPadding = 0
for handle in handles:
chat_handles += handle[0] + ", "
# Set chatPadding to the longest chat_handle
if len(handle[0]) > len(prevHandle):
chatPadding = len(handle[0])
prevHandle = handle[0]
# Strip off the last comma
chat_handles = chat_handles[:-2]
if len(chat_handles) == 0:
chat_handles = "NOBODY"
chatTitle = "Group conversation with %s from %s to %s" % (chat_handles, firstDate, lastDate)
chatEnd = "End of group conversation with %s" % (chat_handles)
else:
groupChat = 0
chatTitle = "Chat transcript with %s from %s to %s" % (prettyID, firstDate, lastDate)
chatEnd = "* End of chat transcript with %s *" % (prettyID)
chatPadding = len(prettyID)
#
# Start printing
#
if HTML == 0:
# Text format
stars = "*" * (len(chatTitle) + 4)
chatTitle = stars + "\n* " + chatTitle + " *\n" + stars
print(chatTitle.encode("utf8"))
for row in rows:
# Skip this message if the text is blank (happens occasionally)
if row[2] is None: continue
# Let's pad "me" so it matches the chat_id length
if groupChat:
who = "me" if row[1] is 1 else row[3]
else:
who = "me" if row[1] is 1 else prettyID
date = row[0]
text = row[2].strip()
# Get the attachment information if one exists
attachFilename = row[4] if row[4] is not None else ""
hasAttachment = 1 if len(attachFilename) > 0 else 0
if hasAttachment:
attachDataSize = format(row[6], ',d')
text = text + "<Attachment removed: " + attachFilename + " (Bytes: " + attachDataSize + " KB)>"
line = "%s @ %s: %s" % (who.rjust(chatPadding, ' '), date, text)
print(line.encode("utf8"))
stars = "*" * (len(chatEnd) + 4)
chatEnd = stars + "\n* " + chatEnd + " *\n" + stars + "\n"
print(chatEnd.encode("utf8"))
else:
# HTML format
# Start of a new chat transcript
printHTMLHeader()
print "<div class=\"message date1\">"
print "<div align=\"center\"><b>" + chatTitle + "</b></div>"
prevDate = ""
for row in rows:
# Skip this message if the text is blank (happens occasionally)
if row[2] is None: continue
guid = row[9]
datetime = row[0]
date = datetime[:11]
time = datetime[11:]
text = row[2].strip()
attachFilename = row[4] if row[4] is not None else ""
hasAttachment = 1 if len(attachFilename) > 0 else 0
# Use "me" or the pretty ID specified
if groupChat:
who = "me" if row[1] is 1 else row[3]
else:
who = "me" if row[1] is 1 else prettyID
# See if an attachment is part of this message
if hasAttachment:
attachment = path.expanduser(attachFilename)
attachGUID = row[7]
attachName = row[8]
mimeType = row[5]
if mimeType is None:
mimeType = "unknown";
# Save the attachments if requested
if keepAttachment:
# Create the attachment dir if it doesn't exist
try:
os.makedirs(ATTACHDIR)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
attachmentFound = 1 if os.path.exists(attachment) else 0
if attachmentFound:
# Attachment found
attachDataSize = format(row[6], ',d')
# Copy the attachment
newFile = ATTACHDIR + "/" + attachGUID + "-" + attachName
newFile = newFile.replace(" ", "_")
copy2(attachment, newFile)
if "video/" in mimeType:
attachmentHTML = "Video: " + attachName + "<BR><DIV ALIGN=\"center\"><video width=\"800\" height=\"540\" controls preload=\"none\"><source id=\"" + attachName + "\" src=\"" + newFile + "\" mimeType=\"" + mimeType + "\"></video></DIV>"
elif "audio/" in mimeType:
attachmentHTML = "Audio: " + attachName + "<BR><DIV ALIGN=\"center\"><audio controls preload=\"none\"><source id=\"" + attachName + "\" src=\"" + attachName + "\" src=\"" + newFile + "\" mimeType=\"" + mimeType + "\"></audio></DIV>"
else:
attachmentHTML = "<a href=\"" + newFile + "\" target=\"_blank\" border=\"0\"><img id=\"" + attachName + "\" src=\"" + newFile + "\" mimetype=\"" + mimeType + "\" width=\"150\" align=\"top\"></a><br>"
# Only newline+center the image if text was included
#if len(text) > 200:
# attachmentHTML = "<BR><DIV ALIGN=\"center\">" + attachmentHTML + "</DIV>"
else:
# No attachment found
attachDataSize = 0
attachmentHTML = cgi.escape("<Attachment not found: " + attachFilename + ">")
if hasAttachment:
text = cgi.escape(text) + attachmentHTML
else:
text = cgi.escape(text)
else:
# Not keeping attachments
attachDataSize = format(row[6], ',d')
if hasAttachment:
text = text + "<Attachment removed: " + attachFilename + " (Bytes: " + attachDataSize + " KB)>"
text = cgi.escape(text)
else:
text = text
text = cgi.escape(text)
# See if this is a new date
if date == prevDate:
# This is the same date as last time, just print the user, time and text
line = "<b>%s @ %s:</b> %s " % (who.rjust(chatPadding, ' '), time, text)
else:
# This is a new date, start a new bubble
line = "</div><div class=\"message date\"><b>%s</b><hr><b>%s @ %s:</b> %s" % (date, who.rjust(chatPadding, ' '), time, text)
# Output the HTML
print(line.encode("utf8"))
prevDate = date
# End of the current chat transcript
print "</div><div class=\"message\" align=\"center\"><b>" + chatEnd + "</b></div>"
#
# exportAll strictly loops through all the chat conversations and calls
# exportID accordingly
#
def exportAll(chatFile, HTML, separate, keepAttachment):
oldstdout = sys.stdout
db = sqlite3.connect(chatFile)
cursor = db.cursor()
rows = cursor.execute("""
SELECT DISTINCT chat_identifier
FROM chat WHERE is_archived = 0 ORDER BY chat_identifier;
""")
# Loop through each ID and export to a file/stdout
for row in rows:
chat_id = row[0]
# Export the chat
exportID(chatFile, row[0], "", HTML, separate, keepAttachment)
sys.stdout = oldstdout
def printHTMLHeader():
print("""
<meta charset=\"utf-8\">
<style>
body { margin: 0; padding: 0; }
.message {
white-space: pre-wrap;
max-width: 800px;
padding: 10px;
margin: 10px;
font-family: "Courier", Calibri, Tahoma;
font-size: 14px;
}
.date { background-color: #EEE; }
.date1 { background-color: #A6DBFF; }
</style>
""")
def usage():
print """
Usage:
%s [-hlastk] [--file <filename>] [--id <chat id>] [--prettyID <pretty ID>]
-h, --help Show this message
-f, --file Alternate SQLite DB to use
-l, --list List all non-archived chats
-a, --all Export all non-archived chats
-s, --separate During export, write chat to a separate file
-c, --id Specify the ID to export
-p, --prettyID Specify the pretty ID to use when exporting a single chat
-t Output chat in TXT format instead of HTML
-k Keep attachments (stored in ./export-attachments)
""" % (__file__)
def main():
chatFile = CHAT_DB
chat_id = "none"
prettyID = ""
export = "id"
HTML = 1
separate = 0
listchats = 0
reqArg = 0
keepAttachment = 0
try:
opts, args = getopt.getopt(sys.argv[1:], "hf:altsc:p:k", ["help", "file=", "all", "list", "separate", "id=", "prettyID="])
except getopt.GetoptError as err:
print str(err)
usage()
sys.exit(2)
# If no args are given, show usage
if len(sys.argv) == 1:
usage()
sys.exit()
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
elif opt in ("-a", "--all"):
export = "all"
reqArg = 1
elif opt in ("-l", "--list"):
listchats = 1
elif opt in ("-c", "--id"):
chat_id = arg
reqArg = 1
elif opt in ("-p", "--prettyID"):
prettyID = arg
elif opt == "-t":
HTML = 0
elif opt in ("-s", "--separate"):
separate = 1
elif opt in ("-f", "--file"):
chatFile = arg
elif opt in ("-k"):
keepAttachment = 1
else:
assert False, "unhandled option"
if not os.path.isfile(chatFile):
print "'" + chatFile + "' not found!"
sys.exit()
if listchats:
list_chats(chatFile)
sys.exit()
# Make sure a required argument was used
if not reqArg:
print "You need to specify either [-i] or [-a]"
usage()
sys.exit()
# See if we're exporting all, or just one conversation
if export == "all":
exportAll(chatFile, HTML, separate, keepAttachment)
else:
if chat_id == "none":
print "The [-t] argument requires either [-i] or [-a]"
usage()
sys.exit()
# Export a single ID since '-a' gets invoked immediately and then exits
printHTMLHeader()
exportID(chatFile, chat_id, prettyID, HTML, separate, keepAttachment)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment