Skip to content

Instantly share code, notes, and snippets.

@dkapila
Last active October 1, 2021 21:22
Show Gist options
  • Save dkapila/479232696644bf6e8711bc6e2a752174 to your computer and use it in GitHub Desktop.
Save dkapila/479232696644bf6e8711bc6e2a752174 to your computer and use it in GitHub Desktop.
Prepare Slack Export for Roam/Logseq/Obsidian
import pandas as pd
import json
import glob
import os
from pathlib import Path
import re
import datetime
from datetime import timezone
import pytz
SLACK_EXPORT_FROM_PATH = './slack_export'
SLACK_EXPORT_TO_PATH = './output'
USE_CLOZE_SYNTAX_FOR_USERNAMES = False
USE_CARDINAL_DATES = False
combined_data = []
user_names = {}
df_copy = pd.DataFrame(columns=['Date', 'Channel', 'Data'])
## List of available timezones: https://stackoverflow.com/questions/13866926/is-there-a-list-of-pytz-timezones
slack_time_zone = pytz.timezone('America/Los_Angeles')
user_ids_to_remove = {}
def run():
print("Getting Channel names...")
channel_names = get_channel_names()
print("Getting User names...")
set_user_names()
print("Preparing Data...")
for idx, val in enumerate(channel_names):
add_daily_comments_from_channel(val)
print("Creating Markdown Pages...")
global df_copy
df_copy = pd.DataFrame(combined_data, columns=['Date', 'Channel', 'Data'])
df = pd.DataFrame(combined_data, columns=['Date', 'Channel', 'Data'])
df.groupby('Date').apply(buildDailyPages)
print("Export complete. All markdown files are in " + SLACK_EXPORT_TO_PATH)
def get_channel_names():
json_file_path = SLACK_EXPORT_FROM_PATH + '/channels.json'
with open(json_file_path, 'r') as j:
contents = json.loads(j.read())
channel_names = []
for key in contents:
channel_names.append(key['name'])
return channel_names
def set_user_names():
json_file_path = SLACK_EXPORT_FROM_PATH + '/users.json'
with open(json_file_path, 'r') as j:
contents = json.loads(j.read())
for key in contents:
real_name = ''
if ('real_name' in key):
real_name = key['real_name']
user_names[key['id']] = {'name': key['name'], 'real_name': real_name}
def add_daily_comments_from_channel(channel):
files_in_channel = glob.glob(SLACK_EXPORT_FROM_PATH + "/" + channel + "/*.json")
for idx, file_path in enumerate(files_in_channel):
with open(file_path, 'r') as j:
file_name = Path(file_path).stem
contents = json.loads(j.read())
combined_data.append([file_name, channel, contents])
def clean_up_text(text, user_id):
user_name_text = ''
if user_id in user_names:
user_name = user_names[user_id]['name']
real_name = user_names[user_id]['real_name']
if (USE_CLOZE_SYNTAX_FOR_USERNAMES):
user_name_text = "{{=: @" + user_name + " | " + real_name + "}} : "
else:
user_name_text = user_name + "(" + real_name + ") : "
users = re.findall(r"\<@U([A-Za-z0-9_]+)\>", text)
for user in users:
user_name_mentioned_text = ''
user_id = 'U' + user
if user_id in user_names:
user_name = user_names[user_id]['name']
real_name = user_names[user_id]['real_name']
if (USE_CLOZE_SYNTAX_FOR_USERNAMES):
user_name_mentioned_text = "{{=: @" + user_name + " | " + real_name + "}}"
else:
user_name_mentioned_text = user_name + "(" + real_name + ") : "
text = text.replace('<@' + user_id + '>', user_name_mentioned_text)
return user_name_text + text.strip().replace('\n', '')
def get_replies(replies, channel_name):
replies_on_post = []
for reply in replies:
ts = reply['ts']
date_ts = datetime.datetime.fromtimestamp(float(ts), tz=slack_time_zone).strftime("%Y-%m-%d")
user = reply['user']
if user in user_ids_to_remove:
continue
df = df_copy[(df_copy['Date'] == date_ts)]
json_arr = df[(df['Channel'] == channel_name)]['Data']
if (json_arr.empty):
continue
for message in json_arr.iloc[0]:
if ('ts' in message and 'user' in message and message['ts'] == ts and message['user'] == user):
replies_on_post.append(message)
return replies_on_post
def add_blocks(daily_note_file_path, channel_name, data):
with open(daily_note_file_path, 'a') as file:
file.write('- ' + '[[' + channel_name + ']]' + '\n')
for block in data:
if('parent_user_id' in block) or ('subtype' in block) or ('user' not in block):
continue
user_id = block['user']
if user_id in user_ids_to_remove:
continue
text = clean_up_text(block['text'], block['user'])
spacing = ' - '
file.write(spacing + text + '\n')
if ('replies' in block):
spacing = ' - '
replies = get_replies(block['replies'], channel_name)
for reply_block in replies:
text = clean_up_text(reply_block['text'], reply_block['user'])
file.write(spacing + text + '\n')
def make_ordinal(n):
n = int(n)
suffix = ['th', 'st', 'nd', 'rd', 'th'][min(n % 10, 4)]
if 11 <= (n % 100) <= 13:
suffix = 'th'
return str(n) + suffix
def get_roam_date_page_from_date(date):
date_time_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
full_month_name = date_time_obj.strftime("%B")
year = date_time_obj.strftime("%Y")
day = date_time_obj.strftime(" %d").replace(' 0', '').replace(' ', '')
day = make_ordinal(day)
roam_date = full_month_name + " " + day + ", " + year
return roam_date
def get_date_string(date):
if (USE_CARDINAL_DATES):
date = get_roam_date_page_from_date(date)
else:
date = date.replace("-", "_")
return date
def add_notes_to_day_page(df):
channel_name = df['Channel'].iloc[0]
date = df['Date'].iloc[0]
data = df['Data'].iloc[0]
date = get_date_string(date)
daily_note_file_path = SLACK_EXPORT_TO_PATH + '/' + date + '.md'
with open(daily_note_file_path, 'a') as fp:
add_blocks(daily_note_file_path, channel_name, data)
def buildDailyPages(df):
date = df['Date'].iloc[0]
date = get_date_string(date)
print("Creating file: " + date + ".md")
daily_note_file_path = SLACK_EXPORT_TO_PATH + '/' + date + '.md'
with open(daily_note_file_path, 'w') as fp:
pass
df.groupby('Channel').apply(add_notes_to_day_page)
if __name__ == "__main__":
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment