kahuang/export-transform-to-slack-csv.py

## export-transform-to-slack-csv.py
import zipfile
import getopt
import sys
import re
import json
import codecs

opts, unparsedArgs = getopt.getopt(sys.argv[1:], "f:")

zipFileName = None
for o, a in opts:
  if o == "-f":
    zipFileName = a

if zipFileName == None:
  print("The `-f exportFile.zip` argument is required")
  sys.exit(2)

zf = zipfile.ZipFile(zipFileName)

userIDToUsername = {}
with zf.open("users.json") as f:
  jsonUsers = json.loads(f.read().decode("utf-8"))["users"]
  for jsonUser in jsonUsers:
    userIDToUsername[jsonUser["user"]] = jsonUser.get("displayName") or jsonUser.get("name") or "NoNameUser"

channelIDToParentID = {}
channelNameByID = {}
with zf.open("channels.json") as f:
  jsonChannels = json.loads(f.read().decode("utf-8"))["channels"]
  for jsonChannel in jsonChannels:
    channelNameByID[jsonChannel["channel"]] = jsonChannel["name"]
    channelIDToParentID[jsonChannel["channel"]] = jsonChannel.get("parent") or ""

files = zf.namelist()
lines = []
threadChannelNameToThreadID = {}
for file in files:
  matched = re.match("(.*)-thread-messages-[0-9]*.json", file)
  if matched:
    folderAndThreadID = matched.group(1)
    threadTitle = "UnknownThread"
    with zf.open(folderAndThreadID + "-thread.json") as f:
      thread = json.loads(f.read().decode("utf-8"))
      threadTitle = thread.get("title") or "UnnamedThread"
    with zf.open(file) as f:
      jsonMessages = json.loads(f.read().decode("utf-8"))["messages"]
      # We want to process these in ascending-chron order to match what the CSV expects
      jsonMessages.reverse()
      while jsonMessages:
        jsonMessage = jsonMessages.pop(0)
        unixSeconds = jsonMessage["created"] / 1000
        username = userIDToUsername.get(jsonMessage["user"]) or "UnknownUser"
        channelName = channelNameByID.get(jsonMessage["channel"]) or "UnknownChannel"
        channelName = channelName + "-" + threadTitle
        channelName = channelName.replace(" ", "-").lower() # Make it look Slack-y
        channelName = channelName.replace("\"", "").lower() # Remove any quotes so the csv is well-formatted
        channelName = channelName[:min(len(channelName), 80)] # Max channel name in Slack is 80 chars
        workingChannelName = channelName
        iteration = 1
        while (workingChannelName in threadChannelNameToThreadID) and (threadChannelNameToThreadID[workingChannelName] != jsonMessage["thread"]):
          iterationStr = "-" + str(iteration)
          workingChannelName = channelName[:min(len(channelName), 80 - len(iterationStr))] + iterationStr
          iteration = iteration + 1
        channelName = workingChannelName
        threadChannelNameToThreadID[channelName] = jsonMessage["thread"]
        messageStr = "UNSUPPORTED"
        textBody = jsonMessage.get("text")
        linkBody = jsonMessage.get("link")
        fileBody = jsonMessage.get("file")
        quoteBody = jsonMessage.get("quote")
        integrationMessageBody = jsonMessage.get("integration_message")
        if jsonMessage.get("deleted"):
          messageStr = "This message was deleted."
        elif jsonMessage.get("moved"):
          continue
        elif textBody:
          messageStr = textBody["body"]
        elif linkBody:
          messageStr = linkBody["url"]
        elif fileBody:
          messageStr = fileBody["previewName"] + ": " + fileBody["link"]
        elif quoteBody:
          quotedMessages = quoteBody["quoted"]
          messageStr = "Quoted " + str(len(quotedMessages)) + " messages:"
          for quotedMessage in quotedMessages:
            quotedMessage["created"] = jsonMessage["created"] # Override the quoted message's timestamp so it sorts correctly
          # Quoted messages are already in ascending chron order so just put them at the beginning of the list and they'll get popped next.
          jsonMessages = quotedMessages + jsonMessages
        elif integrationMessageBody:
          messageStr = integrationMessageBody["encoded"]

        messageStr = messageStr.replace('"', '\\"').replace("\\\\\"", "\\\\\\\"")
        lines.append("\"" + str(unixSeconds) + "\",\"" + channelName + "\",\"" + username + "\",\"" + messageStr + "\"")

def lineSortKey(line):
  line[:12] # Just the timestamp portion of the line, otherwise keep the original order.

if __name__ == '__main__':
  try:
    lines = sorted(lines, key=lineSortKey)
  except Exception as e:
    print("Sorting exception")
  outFileName = zipFileName.replace("zip", "csv")
  outFile = codecs.open(outFileName, 'w', encoding='utf-8')
  for message in lines:
    outFile.write(message + "\n")
  outFile.close()

  print("Done transforming, results saved in " + outFileName)
  print("How to import this file into Slack:")
  print("- Go to Settings & Administration -> Workspace Settings")
  print("- At the top right of the screen there's an \"Import/Export Data\" button")
  print("- Choose the \"CSV/Text File\" option")
  print("- Chose this generated file as the \"Your CSV file\" and keep the default delimiter, click \"Start Import\"")
  print("\n")
  print("This import may take several days if you have a large Quill team with many threads/messages.")
  print("\n")
  print("Due to limitations of the Slack import tool, every Quill thread is turned into a separate Slack channel.  You may want to prune your Quill workspace before exporting (or manually remove lines from the CSV) to limit how many channels get created.  The user that does the import will be automatically subscribed to all of the imported channels, which makes the Slack app unusable if you have a very large number of channels.  We recommend creating a dummy account to do the import.")
	import zipfile
	import getopt
	import sys
	import re
	import json
	import codecs

	opts, unparsedArgs = getopt.getopt(sys.argv[1:], "f:")

	zipFileName = None
	for o, a in opts:
	if o == "-f":
	zipFileName = a

	if zipFileName == None:
	print("The `-f exportFile.zip` argument is required")
	sys.exit(2)

	zf = zipfile.ZipFile(zipFileName)

	userIDToUsername = {}
	with zf.open("users.json") as f:
	jsonUsers = json.loads(f.read().decode("utf-8"))["users"]
	for jsonUser in jsonUsers:
	userIDToUsername[jsonUser["user"]] = jsonUser.get("displayName") or jsonUser.get("name") or "NoNameUser"

	channelIDToParentID = {}
	channelNameByID = {}
	with zf.open("channels.json") as f:
	jsonChannels = json.loads(f.read().decode("utf-8"))["channels"]
	for jsonChannel in jsonChannels:
	channelNameByID[jsonChannel["channel"]] = jsonChannel["name"]
	channelIDToParentID[jsonChannel["channel"]] = jsonChannel.get("parent") or ""

	files = zf.namelist()
	lines = []
	threadChannelNameToThreadID = {}
	for file in files:
	matched = re.match("(.)-thread-messages-[0-9].json", file)
	if matched:
	folderAndThreadID = matched.group(1)
	threadTitle = "UnknownThread"
	with zf.open(folderAndThreadID + "-thread.json") as f:
	thread = json.loads(f.read().decode("utf-8"))
	threadTitle = thread.get("title") or "UnnamedThread"
	with zf.open(file) as f:
	jsonMessages = json.loads(f.read().decode("utf-8"))["messages"]
	# We want to process these in ascending-chron order to match what the CSV expects
	jsonMessages.reverse()
	while jsonMessages:
	jsonMessage = jsonMessages.pop(0)
	unixSeconds = jsonMessage["created"] / 1000
	username = userIDToUsername.get(jsonMessage["user"]) or "UnknownUser"
	channelName = channelNameByID.get(jsonMessage["channel"]) or "UnknownChannel"
	channelName = channelName + "-" + threadTitle
	channelName = channelName.replace(" ", "-").lower() # Make it look Slack-y
	channelName = channelName.replace("\"", "").lower() # Remove any quotes so the csv is well-formatted
	channelName = channelName[:min(len(channelName), 80)] # Max channel name in Slack is 80 chars
	workingChannelName = channelName
	iteration = 1
	while (workingChannelName in threadChannelNameToThreadID) and (threadChannelNameToThreadID[workingChannelName] != jsonMessage["thread"]):
	iterationStr = "-" + str(iteration)
	workingChannelName = channelName[:min(len(channelName), 80 - len(iterationStr))] + iterationStr
	iteration = iteration + 1
	channelName = workingChannelName
	threadChannelNameToThreadID[channelName] = jsonMessage["thread"]
	messageStr = "UNSUPPORTED"
	textBody = jsonMessage.get("text")
	linkBody = jsonMessage.get("link")
	fileBody = jsonMessage.get("file")
	quoteBody = jsonMessage.get("quote")
	integrationMessageBody = jsonMessage.get("integration_message")
	if jsonMessage.get("deleted"):
	messageStr = "This message was deleted."
	elif jsonMessage.get("moved"):
	continue
	elif textBody:
	messageStr = textBody["body"]
	elif linkBody:
	messageStr = linkBody["url"]
	elif fileBody:
	messageStr = fileBody["previewName"] + ": " + fileBody["link"]
	elif quoteBody:
	quotedMessages = quoteBody["quoted"]
	messageStr = "Quoted " + str(len(quotedMessages)) + " messages:"
	for quotedMessage in quotedMessages:
	quotedMessage["created"] = jsonMessage["created"] # Override the quoted message's timestamp so it sorts correctly
	# Quoted messages are already in ascending chron order so just put them at the beginning of the list and they'll get popped next.
	jsonMessages = quotedMessages + jsonMessages
	elif integrationMessageBody:
	messageStr = integrationMessageBody["encoded"]

	messageStr = messageStr.replace('"', '\\"').replace("\\\\\"", "\\\\\\\"")
	lines.append("\"" + str(unixSeconds) + "\",\"" + channelName + "\",\"" + username + "\",\"" + messageStr + "\"")

	def lineSortKey(line):
	line[:12] # Just the timestamp portion of the line, otherwise keep the original order.

	if __name__ == '__main__':
	try:
	lines = sorted(lines, key=lineSortKey)
	except Exception as e:
	print("Sorting exception")
	outFileName = zipFileName.replace("zip", "csv")
	outFile = codecs.open(outFileName, 'w', encoding='utf-8')
	for message in lines:
	outFile.write(message + "\n")
	outFile.close()

	print("Done transforming, results saved in " + outFileName)
	print("How to import this file into Slack:")
	print("- Go to Settings & Administration -> Workspace Settings")
	print("- At the top right of the screen there's an \"Import/Export Data\" button")
	print("- Choose the \"CSV/Text File\" option")
	print("- Chose this generated file as the \"Your CSV file\" and keep the default delimiter, click \"Start Import\"")
	print("\n")
	print("This import may take several days if you have a large Quill team with many threads/messages.")
	print("\n")
	print("Due to limitations of the Slack import tool, every Quill thread is turned into a separate Slack channel. You may want to prune your Quill workspace before exporting (or manually remove lines from the CSV) to limit how many channels get created. The user that does the import will be automatically subscribed to all of the imported channels, which makes the Slack app unusable if you have a very large number of channels. We recommend creating a dummy account to do the import.")