Skip to content

Instantly share code, notes, and snippets.

@kahuang
Last active December 10, 2021 04:45
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save kahuang/e9771ed920e7afe78dc803265a0767c5 to your computer and use it in GitHub Desktop.
Save kahuang/e9771ed920e7afe78dc803265a0767c5 to your computer and use it in GitHub Desktop.
import zipfile
import getopt
import sys
import re
import json
import codecs
opts, unparsedArgs = getopt.getopt(sys.argv[1:], "f:")
zipFileName = None
for o, a in opts:
if o == "-f":
zipFileName = a
if zipFileName == None:
print("The `-f exportFile.zip` argument is required")
sys.exit(2)
zf = zipfile.ZipFile(zipFileName)
userIDToUsername = {}
with zf.open("users.json") as f:
jsonUsers = json.loads(f.read().decode("utf-8"))["users"]
for jsonUser in jsonUsers:
userIDToUsername[jsonUser["user"]] = jsonUser.get("displayName") or jsonUser.get("name") or "NoNameUser"
channelIDToParentID = {}
channelNameByID = {}
with zf.open("channels.json") as f:
jsonChannels = json.loads(f.read().decode("utf-8"))["channels"]
for jsonChannel in jsonChannels:
channelNameByID[jsonChannel["channel"]] = jsonChannel["name"]
channelIDToParentID[jsonChannel["channel"]] = jsonChannel.get("parent") or ""
files = zf.namelist()
lines = []
threadChannelNameToThreadID = {}
for file in files:
matched = re.match("(.*)-thread-messages-[0-9]*.json", file)
if matched:
folderAndThreadID = matched.group(1)
threadTitle = "UnknownThread"
with zf.open(folderAndThreadID + "-thread.json") as f:
thread = json.loads(f.read().decode("utf-8"))
threadTitle = thread.get("title") or "UnnamedThread"
with zf.open(file) as f:
jsonMessages = json.loads(f.read().decode("utf-8"))["messages"]
# We want to process these in ascending-chron order to match what the CSV expects
jsonMessages.reverse()
while jsonMessages:
jsonMessage = jsonMessages.pop(0)
unixSeconds = jsonMessage["created"] / 1000
username = userIDToUsername.get(jsonMessage["user"]) or "UnknownUser"
channelName = channelNameByID.get(jsonMessage["channel"]) or "UnknownChannel"
channelName = channelName + "-" + threadTitle
channelName = channelName.replace(" ", "-").lower() # Make it look Slack-y
channelName = channelName.replace("\"", "").lower() # Remove any quotes so the csv is well-formatted
channelName = channelName[:min(len(channelName), 80)] # Max channel name in Slack is 80 chars
workingChannelName = channelName
iteration = 1
while (workingChannelName in threadChannelNameToThreadID) and (threadChannelNameToThreadID[workingChannelName] != jsonMessage["thread"]):
iterationStr = "-" + str(iteration)
workingChannelName = channelName[:min(len(channelName), 80 - len(iterationStr))] + iterationStr
iteration = iteration + 1
channelName = workingChannelName
threadChannelNameToThreadID[channelName] = jsonMessage["thread"]
messageStr = "UNSUPPORTED"
textBody = jsonMessage.get("text")
linkBody = jsonMessage.get("link")
fileBody = jsonMessage.get("file")
quoteBody = jsonMessage.get("quote")
integrationMessageBody = jsonMessage.get("integration_message")
if jsonMessage.get("deleted"):
messageStr = "This message was deleted."
elif jsonMessage.get("moved"):
continue
elif textBody:
messageStr = textBody["body"]
elif linkBody:
messageStr = linkBody["url"]
elif fileBody:
messageStr = fileBody["previewName"] + ": " + fileBody["link"]
elif quoteBody:
quotedMessages = quoteBody["quoted"]
messageStr = "Quoted " + str(len(quotedMessages)) + " messages:"
for quotedMessage in quotedMessages:
quotedMessage["created"] = jsonMessage["created"] # Override the quoted message's timestamp so it sorts correctly
# Quoted messages are already in ascending chron order so just put them at the beginning of the list and they'll get popped next.
jsonMessages = quotedMessages + jsonMessages
elif integrationMessageBody:
messageStr = integrationMessageBody["encoded"]
messageStr = messageStr.replace('"', '\\"').replace("\\\\\"", "\\\\\\\"")
lines.append("\"" + str(unixSeconds) + "\",\"" + channelName + "\",\"" + username + "\",\"" + messageStr + "\"")
def lineSortKey(line):
line[:12] # Just the timestamp portion of the line, otherwise keep the original order.
if __name__ == '__main__':
try:
lines = sorted(lines, key=lineSortKey)
except Exception as e:
print("Sorting exception")
outFileName = zipFileName.replace("zip", "csv")
outFile = codecs.open(outFileName, 'w', encoding='utf-8')
for message in lines:
outFile.write(message + "\n")
outFile.close()
print("Done transforming, results saved in " + outFileName)
print("How to import this file into Slack:")
print("- Go to Settings & Administration -> Workspace Settings")
print("- At the top right of the screen there's an \"Import/Export Data\" button")
print("- Choose the \"CSV/Text File\" option")
print("- Chose this generated file as the \"Your CSV file\" and keep the default delimiter, click \"Start Import\"")
print("\n")
print("This import may take several days if you have a large Quill team with many threads/messages.")
print("\n")
print("Due to limitations of the Slack import tool, every Quill thread is turned into a separate Slack channel. You may want to prune your Quill workspace before exporting (or manually remove lines from the CSV) to limit how many channels get created. The user that does the import will be automatically subscribed to all of the imported channels, which makes the Slack app unusable if you have a very large number of channels. We recommend creating a dummy account to do the import.")
@colmanhumphrey
Copy link

I think the function should be:

def lineSortKey(line):
  return line[:12]

if I'm not mistaken

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment