Skip to content

Instantly share code, notes, and snippets.

@timmolderez
Last active February 5, 2021 18:10
Show Gist options
  • Save timmolderez/2a6ffa4efa04bbf15c7a411b43f55a9c to your computer and use it in GitHub Desktop.
Save timmolderez/2a6ffa4efa04bbf15c7a411b43f55a9c to your computer and use it in GitHub Desktop.
Slack - generate a wordcloud per user from a Slack chat export
"""
Restructures Slack chat export files so that the messages are now grouped per user
(normally the export is grouped per channel)
Not actually useful or anything :) Mainly just for fun so you can generate a
wordcloud per user, then play the "guess whose wordcloud is this?"-game
Usage: fill in INPUT_DIR and OUTPUT_DIR, then run the script..
- See this R script to create wordclouds from a chat export:
https://github.com/codeandsupply/chat-word-cloud
(If you want a wordcloud per channel, you can directly use that R script.
If you want one per user, run this Python script first.
Use `setwd()` to choose which folder/user to generate a wordcloud for..)
- See this page on how admins can export chat history:
https://slack.com/intl/en-be/help/articles/201658943-Export-your-workspace-data
"""
import json
import os
from typing import Dict
INPUT_DIR = '~/Desktop/slack-export/' # Folder containing the (unzipped) chat export
OUTPUT_DIR = '~/Desktop/slack-export-per-user/' # Folder where the restructered data will be stored
def restructure_workspace_export_per_user() -> None:
for file in os.scandir(INPUT_DIR):
if file.is_dir():
restructure_channel_export_per_user(file)
close_output_files()
def restructure_channel_export_per_user(channel_dir: str) -> None:
for json_file in os.scandir(channel_dir):
with open(json_file, 'r', encoding='utf-8') as f:
all_messages = json.load(f)
for msg in all_messages:
process_message_dict(msg)
def process_message_dict(message: Dict) -> None:
if 'user_profile' not in message:
# Skip messages sent by a bot
return
display_name = message['user_profile']['display_name']
out_dir = f'{OUTPUT_DIR}{display_name}'
out_file = f'{out_dir}/all_messages.json'
os.makedirs(out_dir, exist_ok=True)
contents = json.dumps(message) + ',\n'
if not os.path.isfile(out_file):
contents = '[\n' + contents
with open(f'{out_dir}/all_messages.json', 'a',
encoding='utf-8', newline='\n') as f:
f.write(contents)
def close_output_files():
# Wrap up all of the output files so they're valid JSON
for user_dir in os.scandir(OUTPUT_DIR):
with open(f'{user_dir.path}/all_messages.json', 'a',
encoding='utf-8', newline='\n') as f:
# Removes the last '\n,'
f.seek(f.tell() - 2, os.SEEK_SET)
f.truncate()
# Add the closing ']'
f.write('\n]')
# Workaround for a bug in the wordcloud R script; it only works
# when there's more than one .json file, so just tossing in an empty one..
with open(f'{user_dir.path}/dummy.json', 'w',
encoding='utf-8', newline='\n') as f:
f.write('[]\n')
restructure_workspace_export_per_user()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment