Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save colmanhumphrey/d24a295e2fbc37c56b3a3cdf2ae0bd5f to your computer and use it in GitHub Desktop.
Save colmanhumphrey/d24a295e2fbc37c56b3a3cdf2ae0bd5f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import zipfile
import getopt
import sys
import re
import json
import codecs
opts, unparsedArgs = getopt.getopt(sys.argv[1:], "f:")
zipFileName = None
for o, a in opts:
if o == "-f":
zipFileName = a
if zipFileName == None:
print("The `-f exportFile.zip` argument is required")
sys.exit(2)
zf = zipfile.ZipFile(zipFileName)
userIDToUsername = {}
with zf.open("users.json") as f:
jsonUsers = json.loads(f.read().decode("utf-8"))["users"]
for jsonUser in jsonUsers:
userIDToUsername[jsonUser["user"]] = jsonUser.get("displayName") or jsonUser.get("name") or "NoNameUser"
channelIDToParentID = {}
channelNameByID = {}
with zf.open("channels.json") as f:
jsonChannels = json.loads(f.read().decode("utf-8"))["channels"]
for jsonChannel in jsonChannels:
channelNameByID[jsonChannel["channel"]] = jsonChannel["name"]
channelIDToParentID[jsonChannel["channel"]] = jsonChannel.get("parent") or ""
files = zf.namelist()
lines = []
threadChannelNameToThreadID = {}
for file in files:
matched = re.match("(.*)-thread-messages-[0-9]*.json", file)
if matched:
folderAndThreadID = matched.group(1)
threadTitle = "UnknownThread"
with zf.open(folderAndThreadID + "-thread.json") as f:
thread = json.loads(f.read().decode("utf-8"))
threadTitle = thread.get("title") or "UnnamedThread"
first = True
with zf.open(file) as f:
jsonMessages = json.loads(f.read().decode("utf-8"))["messages"]
# We want to process these in ascending-chron order to match what the CSV expects
jsonMessages.reverse()
while jsonMessages:
jsonMessage = jsonMessages.pop(0)
unixSeconds = round(jsonMessage["created"] / 1000)
username = userIDToUsername.get(jsonMessage["user"]) or "UnknownUser"
channelName = channelNameByID.get(jsonMessage["channel"]) or "UnknownChannel"
workingThreadTitle = threadTitle
channelName = channelName.replace(" ", "-").lower() # Make it look Slack-y
channelName = channelName.replace("\"", "").lower() # Remove any quotes so the csv is well-formatted
channelName = channelName[:min(len(channelName), 80)] # Max channel name in Slack is 80 chars
if channelName == "unknownchannel":
continue
workingThreadTitle = workingThreadTitle.replace("\"", "") # Remove any quotes so the csv is well-formatted
workingThreadTitle = workingThreadTitle.replace("\n", " ") # Replace newlines for a flat look
workingThreadTitle = workingThreadTitle[:min(len(workingThreadTitle), 120)] # Avoid excessing titles...
if first:
lines.append("\"" + str(unixSeconds - 1) + "\",\"" + channelName + "\",\"new-quill-thread\",\"_New thread_\n*" + workingThreadTitle + "*\"")
first = False
messageStr = "UNSUPPORTED"
textBody = jsonMessage.get("text")
linkBody = jsonMessage.get("link")
fileBody = jsonMessage.get("file")
quoteBody = jsonMessage.get("quote")
integrationMessageBody = jsonMessage.get("integration_message")
if jsonMessage.get("deleted"):
continue
elif jsonMessage.get("moved"):
continue
elif textBody:
messageStr = textBody["body"]
elif linkBody:
messageStr = linkBody["url"]
elif fileBody:
messageStr = fileBody["previewName"] + ": " + fileBody["link"]
elif quoteBody:
quotedMessages = quoteBody["quoted"]
messageStr = "Quoted " + str(len(quotedMessages)) + " messages:"
for quotedMessage in quotedMessages:
quotedMessage["created"] = jsonMessage["created"] # Override the quoted message's timestamp so it sorts correctly
# Quoted messages are already in ascending chron order so just put them at the beginning of the list and they'll get popped next.
jsonMessages = quotedMessages + jsonMessages
elif integrationMessageBody:
messageStr = integrationMessageBody["encoded"]
messageStr = messageStr.replace('"', '\\"').replace("\\\\\"", "\\\\\\\"")
lines.append("\"" + str(unixSeconds) + "\",\"" + channelName + "\",\"" + username + "\",\"" + messageStr + "\"")
def lineSortKey(line):
return line[:12] # Just the timestamp portion of the line, otherwise keep the original order.
if __name__ == '__main__':
try:
lines = sorted(lines, key=lineSortKey)
except Exception as e:
print("Sorting exception")
outFileName = zipFileName.replace("zip", "csv")
outFile = codecs.open(outFileName, 'w', encoding='utf-8')
for message in lines:
outFile.write(message + "\n")
outFile.close()
print("Done transforming, results saved in " + outFileName)
print("How to import this file into Slack:")
print("- Go to Settings & Administration -> Workspace Settings")
print("- At the top right of the screen there's an \"Import/Export Data\" button")
print("- Choose the \"CSV/Text File\" option")
print("- Chose this generated file as the \"Your CSV file\" and keep the default delimiter, click \"Start Import\"")
print("\n")
print("This import may take several days if you have a large Quill team with many threads/messages.")
print("\n")
print("This version puts all threads within their channel, with a new user `new-quill-thread` posting the thread name prior to the thread starting.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment