Skip to content

Instantly share code, notes, and snippets.

@hastyeagle
Forked from nomicode/export_chat.py
Last active July 17, 2022 15:43
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save hastyeagle/9a01f549535a167071cfbc0e61d57927 to your computer and use it in GitHub Desktop.
Save hastyeagle/9a01f549535a167071cfbc0e61d57927 to your computer and use it in GitHub Desktop.
Export iOS/iMessage chat logs to HTML or text
#!/usr/bin/env python
import sys
import argparse
import urllib
import urlparse
import base64
import mimetypes
import cgi
import sqlite3
import os
import errno
import plistlib
from os import path
from shutil import copy2
# TODO:
# - Add auto-linking
# - Export video media
# - Match chat IDs up to names using Contacts.app SQLite db
CHAT_DB = path.expanduser("~/Library/Messages/chat.db")
CHAT_DB_BACKUP_PATH = path.expanduser("~/Library/Application Support/MobileSync/Backup")
CHAT_DB_BACKUP = "3d0d7e5fb2ce288813306e4d4636395e047a3d28"
ATTACHDIR = "export-Attachments"
# Apple's epoch starts on January 1st, 2001 for some reason...
# cf. http://apple.stackexchange.com/questions/114168
EPOCH = 978307200
def list_backups():
print "Below is a list of possible backups to choose from:"
for dir in os.listdir(CHAT_DB_BACKUP_PATH):
if os.path.isdir(os.path.join(CHAT_DB_BACKUP_PATH, dir)):
# Get the device name
info = plistlib.readPlist(os.path.join(CHAT_DB_BACKUP_PATH, dir + "/Info.plist"))
devName = unicode(info.get("Device Name", "missing"))
print " " + dir + " (" + devName + ")"
def list_chats(chatFile):
try:
db = sqlite3.connect(chatFile)
except:
print "Error accessing '" + chatFile + "'!"
sys.exit()
cursor = db.cursor()
cursor2 = db.cursor()
print "Below is a list of IDs and their associated message counts (most recent first):"
rows = cursor.execute("""
SELECT DISTINCT chat_identifier
FROM chat WHERE is_archived = 0 ORDER BY chat_identifier;
""")
# Loop through each ID and print out the ID and the number of messages
# associated with that ID
chatIDs = {}
for row in rows:
# See how many messages there are for this ID
chat_id = row[0]
sql = """
SELECT COUNT(*)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = \"%s\";
""" % (chat_id)
sql = cursor2.execute(sql)
numRows = sql.fetchone()[0]
if numRows == 0: continue
sql = cursor2.execute("""
SELECT substr(datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime'), 0, 11)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = ?
ORDER BY m.date LIMIT 1;
""", (EPOCH, chat_id))
firstDate = sql.fetchone()[0]
# Grab the date of the last message
sql = cursor2.execute("""
SELECT substr(datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime'), 0, 11)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = ?
ORDER BY m.date DESC LIMIT 1;
""", (EPOCH, chat_id))
lastDate = sql.fetchone()[0]
# Throw the information into an array that we can then sort
chatIDs[chat_id] = [numRows, firstDate, lastDate]
# Sort the array by lastDate
for key, value in sorted(chatIDs.items(), key=lambda e: e[1][2], reverse=True):
print " " + key + " (" + str(value[0]) + " messages, " + str(value[1]) + " to " + str(value[2]) + ")"
def exportID(chatFile, chat_id, prettyID, HTML, separate, keepAttachment):
oldstdout = sys.stdout
try:
db = sqlite3.connect(chatFile)
except:
print "Error accessing '" + chatFile + "'!"
sys.exit()
db.row_factory = sqlite3.Row
cursor = db.cursor()
cursor2 = db.cursor()
if len(prettyID) > 0:
prettyPrint = 1
else:
prettyPrint = 0
prettyID = chat_id
# Grab the date of the first message
sql = cursor.execute("""
SELECT substr(datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime'), 0, 11)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = ?
ORDER BY m.date LIMIT 1;
""", (EPOCH, chat_id))
row = sql.fetchone()
# First make sure data actually exists for this chat_id
if row == None:
print "No chat logs found for '%s'!" % (chat_id)
return
# Now grab the date of the first message
firstDate = row[0]
# Grab the date of the last message
sql = cursor.execute("""
SELECT substr(datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime'), 0, 11)
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
WHERE c.chat_identifier = ?
ORDER BY m.date DESC LIMIT 1;
""", (EPOCH, chat_id))
lastDate = sql.fetchone()[0]
# Grab all the rows for this chat_id
rows = cursor.execute("""
SELECT datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime') as fmtdate,
m.is_from_me,
m.text,
h.id,
a.filename,
a.mime_type,
a.total_bytes,
a.guid,
a.transfer_name,
m.ROWID
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
LEFT JOIN handle as h
ON h.ROWID = m.handle_ID
LEFT JOIN message_attachment_join AS ma
ON ma.message_id = m.ROWID
LEFT JOIN attachment as a
ON a.ROWID = ma.attachment_id
WHERE c.chat_identifier = ?
ORDER BY m.date;
""", (EPOCH, chat_id))
if HTML:
logExt = ".html"
else:
logExt = ".txt"
if separate:
# Output each message to their own file
sys.stdout = open("Export-" + chat_id.replace("+", "") + logExt, 'w')
else:
# Output each message to one file
sys.stdout = oldstdout
# See if this is a group chat instead of an individual conversation
if prettyID[:4] == "chat":
groupChat = 1
# Get the handles that are part of the chat
handles = cursor2.execute("""
SELECT DISTINCT h.id
FROM chat as c
INNER JOIN chat_message_join AS cm
ON cm.chat_id = c.ROWID
INNER JOIN message AS m
ON m.ROWID = cm.message_id
INNER JOIN handle as h
ON h.ROWID = m.handle_id
WHERE c.chat_identifier = ?
ORDER BY h.id;
""", (chat_id,))
chat_handles = ""
prevHandle = ""
chatPadding = 0
for handle in handles:
chat_handles += handle[0] + ", "
# Set chatPadding to the longest chat_handle
if len(handle[0]) > len(prevHandle):
chatPadding = len(handle[0])
prevHandle = handle[0]
# Strip off the last comma
chat_handles = chat_handles[:-2]
if len(chat_handles) == 0:
chat_handles = "NOBODY"
chatTitle = "Group conversation with %s from %s to %s" % (chat_handles, firstDate, lastDate)
chatEnd = "End of group conversation with %s" % (chat_handles)
else:
groupChat = 0
chatTitle = "Chat transcript with %s from %s to %s" % (prettyID, firstDate, lastDate)
chatEnd = "* End of chat transcript with %s *" % (prettyID)
chatPadding = len(prettyID)
#
# Start printing
#
if HTML == 0:
# Text format
stars = "*" * (len(chatTitle) + 4)
chatTitle = stars + "\n* " + chatTitle + " *\n" + stars
print(chatTitle.encode("utf8"))
for row in rows:
# Skip this message if the text is blank (happens occasionally)
if row[2] is None: continue
# Let's pad "me" so it matches the chat_id length
if groupChat:
who = "me" if row[1] is 1 else row[3]
else:
who = "me" if row[1] is 1 else prettyID
date = row[0]
text = row[2].strip()
# Get the attachment information if one exists
attachFilename = row[4] if row[4] is not None else ""
hasAttachment = 1 if len(attachFilename) > 0 else 0
if hasAttachment:
attachDataSize = format(row[6], ',d')
text = text + "<Attachment removed: " + attachFilename + " (Bytes: " + attachDataSize + " KB)>"
line = "%s @ %s: %s" % (who.rjust(chatPadding, ' '), date, text)
print(line.encode("utf8"))
stars = "*" * (len(chatEnd) + 4)
chatEnd = stars + "\n* " + chatEnd + " *\n" + stars + "\n"
print(chatEnd.encode("utf8"))
else:
# HTML format
# Start of a new chat transcript
printHTMLHeader()
print "<div class=\"message date1\">"
print "<div align=\"center\"><b>" + chatTitle + "</b></div>"
prevDate = ""
for row in rows:
# Skip this message if the text is blank (happens occasionally)
if row[2] is None: continue
guid = row[9]
datetime = row[0]
date = datetime[:11]
time = datetime[11:]
text = row[2].strip()
attachFilename = row[4] if row[4] is not None else ""
hasAttachment = 1 if len(attachFilename) > 0 else 0
# Use "me" or the pretty ID specified
if groupChat:
who = "me" if row[1] is 1 else row[3]
else:
who = "me" if row[1] is 1 else prettyID
# See if an attachment is part of this message
if hasAttachment:
attachment = path.expanduser(attachFilename)
attachGUID = row[7]
attachName = row[8]
mimeType = row[5]
# Save the attachments if requested
if keepAttachment:
# Create the attachment dir if it doesn't exist
try:
os.makedirs(ATTACHDIR)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
attachmentFound = 1 if os.path.exists(attachment) else 0
if attachmentFound:
# Attachment found
attachDataSize = format(row[6], ',d')
# Copy the attachment
newFile = ATTACHDIR + "/" + attachGUID + "-" + attachName
newFile = newFile.replace(" ", "_")
copy2(attachment, newFile)
if "video/" in mimeType:
attachmentHTML = "Video: " + attachName + "<BR><DIV ALIGN=\"center\"><video id=\"" + attachName + "\" src=\"" + newFile + "\" width=\"800\" height=\"540\" controls></video></DIV>"
else:
attachmentHTML = "<a href=\"" + newFile + "\" target=\"_blank\" border=\"0\"><img src=\"" + newFile + "\" width=\"150\" align=\"top\"></a><br>"
# Only newline+center the image if text was included
#if len(text) > 200:
# attachmentHTML = "<BR><DIV ALIGN=\"center\">" + attachmentHTML + "</DIV>"
else:
# No attachment found
attachDataSize = 0
attachmentHTML = cgi.escape("<Attachment not found: " + attachFilename + ">")
if hasAttachment:
text = cgi.escape(text) + attachmentHTML
else:
text = cgi.escape(text)
else:
# Not keeping attachments
attachDataSize = format(row[6], ',d')
if hasAttachment:
text = text + "<Attachment removed: " + attachFilename + " (Bytes: " + attachDataSize + " KB)>"
text = cgi.escape(text)
else:
text = text
text = cgi.escape(text)
# See if this is a new date
if date == prevDate:
# This is the same date as last time, just print the user, time and text
line = "<b>%s @ %s:</b> %s " % (who.rjust(chatPadding, ' '), time, text)
else:
# This is a new date, start a new bubble
line = "</div><div class=\"message date\"><b>%s</b><hr><b>%s @ %s:</b> %s" % (date, who.rjust(chatPadding, ' '), time, text)
# Output the HTML
print(line.encode("utf8"))
prevDate = date
# End of the current chat transcript
print "</div><div class=\"message\" align=\"center\"><b>" + chatEnd + "</b></div>"
#
# exportAll strictly loops through all the chat conversations and calls
# exportID accordingly
#
def exportAll(chatFile, HTML, separate, keepAttachment):
oldstdout = sys.stdout
try:
db = sqlite3.connect(chatFile)
except:
print "Error accessing '" + chatFile + "'!"
sys.exit()
cursor = db.cursor()
rows = cursor.execute("""
SELECT DISTINCT chat_identifier
FROM chat WHERE is_archived = 0 ORDER BY chat_identifier;
""")
# Loop through each ID and export to a file/stdout
for row in rows:
chat_id = row[0]
# Export the chat
exportID(chatFile, row[0], "", HTML, separate, keepAttachment)
sys.stdout = oldstdout
def printHTMLHeader():
print("""
<meta charset=\"utf-8\">
<style>
body { margin: 0; padding: 0; }
.message {
white-space: pre-wrap;
max-width: 800px;
padding: 10px;
margin: 10px;
font-family: "Courier", Calibri, Tahoma;
font-size: 14px;
}
.date { background-color: #EEE; }
.date1 { background-color: #A6DBFF; }
</style>
""")
def usage():
args.print_help()
def main():
chatFile = CHAT_DB
chat_id = "none"
prettyID = ""
export = "id"
HTML = 1
separate = 0
listchats = 0
reqArg = 0
keepAttachment = 0
exportType = "m"
backupID = ""
# Options with a colon after them require an argument.
# Long options with an = after them require an argument.
parser = argparse.ArgumentParser()
parser.add_argument("--type", help="Specify the source. Either [m]essages (default), or [b]ackup. When specifying [b], you must also pass the -b option to specify which backup to use.")
parser.add_argument("-b", "--backupID", nargs='?', help="Specifies which backup to use. If blank, a list of backups will be shown.")
parser.add_argument("-f", "--file", help="Alternate SQLite DB to use")
parser.add_argument("-l", "--list", action="store_true", help="List all non-archived chats")
parser.add_argument("-s", "--separate", action="store_true", help="During export, write chat(s) to a separate file")
parser.add_argument("-c", "--id", help="Specify the ID to export. Specify 'a' for all.")
parser.add_argument("-p", "--prettyID", help="Specify the pretty ID to use when exporting a single chat")
parser.add_argument("-t", action="store_true", help="Output chat in TXT format instead of HTML")
parser.add_argument("-k", action="store_true", help="Keep attachments (stored in ./export-attachments)")
args = parser.parse_args()
if args.type:
exportType = args.type
if args.backupID:
backupID = args.backupID
if args.list:
listchats = 1
if args.id:
if args.id == "a":
export = "all"
else:
chat_id = args.id
reqArg = 1
if args.prettyID:
prettyID = args.prettyID
if args.t:
HTML = 0
if args.separate:
separate = 1
if args.file:
chatFile = args.file
if args.k:
keepAttachment = 1
if not os.path.isfile(chatFile):
print "'" + chatFile + "' not found!"
sys.exit()
# Set chatFile accordingly if we're using a backup instead of the (default)
# Messages DB. If so, see if an argument was passed for -b, and if not print
# out the current backups available.
if exportType != "b" and exportType != "m":
print "The export type must be either [b]ackups or [m]essages."
print
parser.print_help()
sys.exit()
if exportType == "b":
if len(backupID) == 0:
# Print out a list of backups available.
list_backups()
sys.exit()
else:
# Verify the backup directory exists.
if not os.path.isdir(os.path.join(CHAT_DB_BACKUP_PATH, backupID)):
print "The backup directory '" + os.path.join(CHAT_DB_BACKUP_PATH, backupID) + "' was not found!"
sys.exit()
# Set the chatFile accordingly
chatFile = os.path.join(CHAT_DB_BACKUP_PATH, backupID + "/3d/" + CHAT_DB_BACKUP )
# See if we want to list all chats.
if listchats:
list_chats(chatFile)
sys.exit()
# Make sure a required argument was used.
if not reqArg:
print "You need to specify [-c]."
print
parser.print_help()
sys.exit()
# See if we're exporting all, or just one conversation.
if export == "all":
exportAll(chatFile, HTML, separate, keepAttachment)
else:
if chat_id == "none":
print "The [-t] argument requires either [-c] or [-a]."
print
parser.print_help()
sys.exit()
# Export a single ID since '-a' gets invoked immediately and then exits.
exportID(chatFile, chat_id, prettyID, HTML, separate, keepAttachment)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment