colmanhumphrey/export-transform-to-slack-csv.py

## export-transform-to-slack-csv.py
#!/usr/bin/env python3

import zipfile
import getopt
import sys
import re
import json
import codecs

opts, unparsedArgs = getopt.getopt(sys.argv[1:], "f:")

zipFileName = None
for o, a in opts:
  if o == "-f":
    zipFileName = a

if zipFileName == None:
  print("The `-f exportFile.zip` argument is required")
  sys.exit(2)

zf = zipfile.ZipFile(zipFileName)

userIDToUsername = {}
with zf.open("users.json") as f:
  jsonUsers = json.loads(f.read().decode("utf-8"))["users"]
  for jsonUser in jsonUsers:
    userIDToUsername[jsonUser["user"]] = jsonUser.get("displayName") or jsonUser.get("name") or "NoNameUser"

channelIDToParentID = {}
channelNameByID = {}
with zf.open("channels.json") as f:
  jsonChannels = json.loads(f.read().decode("utf-8"))["channels"]
  for jsonChannel in jsonChannels:
    channelNameByID[jsonChannel["channel"]] = jsonChannel["name"]
    channelIDToParentID[jsonChannel["channel"]] = jsonChannel.get("parent") or ""

files = zf.namelist()
lines = []
threadChannelNameToThreadID = {}
for file in files:
  matched = re.match("(.*)-thread-messages-[0-9]*.json", file)
  if matched:
    folderAndThreadID = matched.group(1)
    threadTitle = "UnknownThread"
    with zf.open(folderAndThreadID + "-thread.json") as f:
      thread = json.loads(f.read().decode("utf-8"))
      threadTitle = thread.get("title") or "UnnamedThread"
    first = True
    with zf.open(file) as f:
      jsonMessages = json.loads(f.read().decode("utf-8"))["messages"]
      # We want to process these in ascending-chron order to match what the CSV expects
      jsonMessages.reverse()
      while jsonMessages:
        jsonMessage = jsonMessages.pop(0)
        unixSeconds = round(jsonMessage["created"] / 1000)
        username = userIDToUsername.get(jsonMessage["user"]) or "UnknownUser"
        channelName = channelNameByID.get(jsonMessage["channel"]) or "UnknownChannel"
        workingThreadTitle = threadTitle

        channelName = channelName.replace(" ", "-").lower() # Make it look Slack-y
        channelName = channelName.replace("\"", "").lower() # Remove any quotes so the csv is well-formatted
        channelName = channelName[:min(len(channelName), 80)] # Max channel name in Slack is 80 chars

        if channelName == "unknownchannel":
          continue

        workingThreadTitle = workingThreadTitle.replace("\"", "") # Remove any quotes so the csv is well-formatted
        workingThreadTitle = workingThreadTitle.replace("\n", " ") # Replace newlines for a flat look
        workingThreadTitle = workingThreadTitle[:min(len(workingThreadTitle), 120)] # Avoid excessing titles...

        if first:
          lines.append("\"" + str(unixSeconds - 1) + "\",\"" + channelName + "\",\"new-quill-thread\",\"_New thread_\n*" + workingThreadTitle + "*\"")
          first = False

        messageStr = "UNSUPPORTED"
        textBody = jsonMessage.get("text")
        linkBody = jsonMessage.get("link")
        fileBody = jsonMessage.get("file")
        quoteBody = jsonMessage.get("quote")
        integrationMessageBody = jsonMessage.get("integration_message")
        if jsonMessage.get("deleted"):
          continue
        elif jsonMessage.get("moved"):
          continue
        elif textBody:
          messageStr = textBody["body"]
        elif linkBody:
          messageStr = linkBody["url"]
        elif fileBody:
          messageStr = fileBody["previewName"] + ": " + fileBody["link"]
        elif quoteBody:
          quotedMessages = quoteBody["quoted"]
          messageStr = "Quoted " + str(len(quotedMessages)) + " messages:"
          for quotedMessage in quotedMessages:
            quotedMessage["created"] = jsonMessage["created"] # Override the quoted message's timestamp so it sorts correctly
          # Quoted messages are already in ascending chron order so just put them at the beginning of the list and they'll get popped next.
          jsonMessages = quotedMessages + jsonMessages
        elif integrationMessageBody:
          messageStr = integrationMessageBody["encoded"]

        messageStr = messageStr.replace('"', '\\"').replace("\\\\\"", "\\\\\\\"")
        lines.append("\"" + str(unixSeconds) + "\",\"" + channelName + "\",\"" + username + "\",\"" + messageStr + "\"")

def lineSortKey(line):
  return line[:12] # Just the timestamp portion of the line, otherwise keep the original order.

if __name__ == '__main__':
  try:
    lines = sorted(lines, key=lineSortKey)
  except Exception as e:
    print("Sorting exception")
  outFileName = zipFileName.replace("zip", "csv")
  outFile = codecs.open(outFileName, 'w', encoding='utf-8')
  for message in lines:
    outFile.write(message + "\n")
  outFile.close()

  print("Done transforming, results saved in " + outFileName)
  print("How to import this file into Slack:")
  print("- Go to Settings & Administration -> Workspace Settings")
  print("- At the top right of the screen there's an \"Import/Export Data\" button")
  print("- Choose the \"CSV/Text File\" option")
  print("- Chose this generated file as the \"Your CSV file\" and keep the default delimiter, click \"Start Import\"")
  print("\n")
  print("This import may take several days if you have a large Quill team with many threads/messages.")
  print("\n")
  print("This version puts all threads within their channel, with a new user `new-quill-thread` posting the thread name prior to the thread starting.")
	#!/usr/bin/env python3

	import zipfile
	import getopt
	import sys
	import re
	import json
	import codecs

	opts, unparsedArgs = getopt.getopt(sys.argv[1:], "f:")

	zipFileName = None
	for o, a in opts:
	if o == "-f":
	zipFileName = a

	if zipFileName == None:
	print("The `-f exportFile.zip` argument is required")
	sys.exit(2)

	zf = zipfile.ZipFile(zipFileName)

	userIDToUsername = {}
	with zf.open("users.json") as f:
	jsonUsers = json.loads(f.read().decode("utf-8"))["users"]
	for jsonUser in jsonUsers:
	userIDToUsername[jsonUser["user"]] = jsonUser.get("displayName") or jsonUser.get("name") or "NoNameUser"

	channelIDToParentID = {}
	channelNameByID = {}
	with zf.open("channels.json") as f:
	jsonChannels = json.loads(f.read().decode("utf-8"))["channels"]
	for jsonChannel in jsonChannels:
	channelNameByID[jsonChannel["channel"]] = jsonChannel["name"]
	channelIDToParentID[jsonChannel["channel"]] = jsonChannel.get("parent") or ""

	files = zf.namelist()
	lines = []
	threadChannelNameToThreadID = {}
	for file in files:
	matched = re.match("(.)-thread-messages-[0-9].json", file)
	if matched:
	folderAndThreadID = matched.group(1)
	threadTitle = "UnknownThread"
	with zf.open(folderAndThreadID + "-thread.json") as f:
	thread = json.loads(f.read().decode("utf-8"))
	threadTitle = thread.get("title") or "UnnamedThread"
	first = True
	with zf.open(file) as f:
	jsonMessages = json.loads(f.read().decode("utf-8"))["messages"]
	# We want to process these in ascending-chron order to match what the CSV expects
	jsonMessages.reverse()
	while jsonMessages:
	jsonMessage = jsonMessages.pop(0)
	unixSeconds = round(jsonMessage["created"] / 1000)
	username = userIDToUsername.get(jsonMessage["user"]) or "UnknownUser"
	channelName = channelNameByID.get(jsonMessage["channel"]) or "UnknownChannel"
	workingThreadTitle = threadTitle

	channelName = channelName.replace(" ", "-").lower() # Make it look Slack-y
	channelName = channelName.replace("\"", "").lower() # Remove any quotes so the csv is well-formatted
	channelName = channelName[:min(len(channelName), 80)] # Max channel name in Slack is 80 chars

	if channelName == "unknownchannel":
	continue

	workingThreadTitle = workingThreadTitle.replace("\"", "") # Remove any quotes so the csv is well-formatted
	workingThreadTitle = workingThreadTitle.replace("\n", " ") # Replace newlines for a flat look
	workingThreadTitle = workingThreadTitle[:min(len(workingThreadTitle), 120)] # Avoid excessing titles...

	if first:
	lines.append("\"" + str(unixSeconds - 1) + "\",\"" + channelName + "\",\"new-quill-thread\",\"_New thread_\n" + workingThreadTitle + "\"")
	first = False

	messageStr = "UNSUPPORTED"
	textBody = jsonMessage.get("text")
	linkBody = jsonMessage.get("link")
	fileBody = jsonMessage.get("file")
	quoteBody = jsonMessage.get("quote")
	integrationMessageBody = jsonMessage.get("integration_message")
	if jsonMessage.get("deleted"):
	continue
	elif jsonMessage.get("moved"):
	continue
	elif textBody:
	messageStr = textBody["body"]
	elif linkBody:
	messageStr = linkBody["url"]
	elif fileBody:
	messageStr = fileBody["previewName"] + ": " + fileBody["link"]
	elif quoteBody:
	quotedMessages = quoteBody["quoted"]
	messageStr = "Quoted " + str(len(quotedMessages)) + " messages:"
	for quotedMessage in quotedMessages:
	quotedMessage["created"] = jsonMessage["created"] # Override the quoted message's timestamp so it sorts correctly
	# Quoted messages are already in ascending chron order so just put them at the beginning of the list and they'll get popped next.
	jsonMessages = quotedMessages + jsonMessages
	elif integrationMessageBody:
	messageStr = integrationMessageBody["encoded"]

	messageStr = messageStr.replace('"', '\\"').replace("\\\\\"", "\\\\\\\"")
	lines.append("\"" + str(unixSeconds) + "\",\"" + channelName + "\",\"" + username + "\",\"" + messageStr + "\"")

	def lineSortKey(line):
	return line[:12] # Just the timestamp portion of the line, otherwise keep the original order.

	if __name__ == '__main__':
	try:
	lines = sorted(lines, key=lineSortKey)
	except Exception as e:
	print("Sorting exception")
	outFileName = zipFileName.replace("zip", "csv")
	outFile = codecs.open(outFileName, 'w', encoding='utf-8')
	for message in lines:
	outFile.write(message + "\n")
	outFile.close()

	print("Done transforming, results saved in " + outFileName)
	print("How to import this file into Slack:")
	print("- Go to Settings & Administration -> Workspace Settings")
	print("- At the top right of the screen there's an \"Import/Export Data\" button")
	print("- Choose the \"CSV/Text File\" option")
	print("- Chose this generated file as the \"Your CSV file\" and keep the default delimiter, click \"Start Import\"")
	print("\n")
	print("This import may take several days if you have a large Quill team with many threads/messages.")
	print("\n")
	print("This version puts all threads within their channel, with a new user `new-quill-thread` posting the thread name prior to the thread starting.")