dkapila/slack_export_to_tft_tools.py

## slack_export_to_tft_tools.py
import pandas as pd
import json
import glob
import os
from pathlib import Path
import re
import datetime
from datetime import timezone
import pytz

SLACK_EXPORT_FROM_PATH = './slack_export'
SLACK_EXPORT_TO_PATH = './output'
USE_CLOZE_SYNTAX_FOR_USERNAMES = False
USE_CARDINAL_DATES = False

combined_data = []
user_names = {}
df_copy = pd.DataFrame(columns=['Date', 'Channel', 'Data'])

## List of available timezones: https://stackoverflow.com/questions/13866926/is-there-a-list-of-pytz-timezones
slack_time_zone = pytz.timezone('America/Los_Angeles')
user_ids_to_remove = {}


def run():
    print("Getting Channel names...")
    channel_names = get_channel_names()

    print("Getting User names...")
    set_user_names()

    print("Preparing Data...")
    for idx, val in enumerate(channel_names):
        add_daily_comments_from_channel(val)

    print("Creating Markdown Pages...")
    global df_copy
    df_copy = pd.DataFrame(combined_data, columns=['Date', 'Channel', 'Data'])
    df = pd.DataFrame(combined_data, columns=['Date', 'Channel', 'Data'])
    df.groupby('Date').apply(buildDailyPages)

    print("Export complete. All markdown files are in " + SLACK_EXPORT_TO_PATH)

def get_channel_names():
    json_file_path = SLACK_EXPORT_FROM_PATH + '/channels.json'

    with open(json_file_path, 'r') as j:
     contents = json.loads(j.read())

    channel_names = []
    for key in contents:
        channel_names.append(key['name'])

    return channel_names

def set_user_names():
    json_file_path = SLACK_EXPORT_FROM_PATH + '/users.json'

    with open(json_file_path, 'r') as j:
     contents = json.loads(j.read())

    for key in contents:
        real_name = ''
        if ('real_name' in key):
            real_name = key['real_name']

        user_names[key['id']] = {'name': key['name'], 'real_name': real_name}


def add_daily_comments_from_channel(channel):
    files_in_channel = glob.glob(SLACK_EXPORT_FROM_PATH + "/" + channel + "/*.json")
    for idx, file_path in enumerate(files_in_channel):
        with open(file_path, 'r') as j:
          file_name = Path(file_path).stem
          contents = json.loads(j.read())
          combined_data.append([file_name, channel, contents])


def clean_up_text(text, user_id):
    user_name_text = ''
    if user_id in user_names:
        user_name = user_names[user_id]['name']
        real_name = user_names[user_id]['real_name']
        if (USE_CLOZE_SYNTAX_FOR_USERNAMES):
            user_name_text = "{{=: @" + user_name + " | " + real_name  + "}} : "
        else:
            user_name_text = user_name + "(" + real_name + ") : "

    users = re.findall(r"\<@U([A-Za-z0-9_]+)\>", text)
    for user in users:
        user_name_mentioned_text = ''
        user_id = 'U' + user
        if user_id in user_names:
            user_name = user_names[user_id]['name']
            real_name = user_names[user_id]['real_name']
            if (USE_CLOZE_SYNTAX_FOR_USERNAMES):
                user_name_mentioned_text = "{{=: @" + user_name + " | " + real_name  + "}}"
            else:
                user_name_mentioned_text = user_name + "(" + real_name + ") : "
            text = text.replace('<@' + user_id + '>', user_name_mentioned_text)
    return user_name_text + text.strip().replace('\n', '')

def get_replies(replies, channel_name):
    replies_on_post = []

    for reply in replies:
        ts = reply['ts']
        date_ts = datetime.datetime.fromtimestamp(float(ts), tz=slack_time_zone).strftime("%Y-%m-%d")
        user = reply['user']
        if user in user_ids_to_remove:
            continue

        df = df_copy[(df_copy['Date'] == date_ts)]
        json_arr = df[(df['Channel'] == channel_name)]['Data']
        if (json_arr.empty):
            continue

        for message in json_arr.iloc[0]:
            if ('ts' in message and 'user' in message and message['ts'] == ts and message['user'] == user):
                replies_on_post.append(message)

    return replies_on_post

def add_blocks(daily_note_file_path, channel_name, data):
    with open(daily_note_file_path, 'a') as file:
        file.write('- ' + '[[' + channel_name + ']]' + '\n')

        for block in data:
            if('parent_user_id' in block) or ('subtype' in block) or ('user' not in block):
                continue

            user_id = block['user']

            if user_id in user_ids_to_remove:
                continue

            text = clean_up_text(block['text'], block['user'])
            spacing = '    - '

            file.write(spacing + text + '\n')
            if ('replies' in block):
                spacing = '        - '
                replies = get_replies(block['replies'], channel_name)
                for reply_block in replies:
                    text = clean_up_text(reply_block['text'], reply_block['user'])
                    file.write(spacing + text + '\n')

def make_ordinal(n):
    n = int(n)
    suffix = ['th', 'st', 'nd', 'rd', 'th'][min(n % 10, 4)]
    if 11 <= (n % 100) <= 13:
        suffix = 'th'
    return str(n) + suffix

def get_roam_date_page_from_date(date):
    date_time_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
    full_month_name = date_time_obj.strftime("%B")
    year = date_time_obj.strftime("%Y")
    day = date_time_obj.strftime(" %d").replace(' 0', '').replace(' ', '')
    day = make_ordinal(day)
    roam_date = full_month_name + " " + day + ", " + year

    return roam_date

def get_date_string(date):
    if (USE_CARDINAL_DATES):
        date = get_roam_date_page_from_date(date)
    else:
        date = date.replace("-", "_")

    return date

def add_notes_to_day_page(df):
    channel_name = df['Channel'].iloc[0]
    date = df['Date'].iloc[0]
    data = df['Data'].iloc[0]

    date = get_date_string(date)

    daily_note_file_path = SLACK_EXPORT_TO_PATH + '/' + date + '.md'

    with open(daily_note_file_path, 'a') as fp:
        add_blocks(daily_note_file_path, channel_name, data)

def buildDailyPages(df):
    date = df['Date'].iloc[0]
    date = get_date_string(date)

    print("Creating file: " + date + ".md")

    daily_note_file_path = SLACK_EXPORT_TO_PATH + '/' + date + '.md'

    with open(daily_note_file_path, 'w') as fp:
        pass

    df.groupby('Channel').apply(add_notes_to_day_page)

if __name__ == "__main__":
    run()
	import pandas as pd
	import json
	import glob
	import os
	from pathlib import Path
	import re
	import datetime
	from datetime import timezone
	import pytz

	SLACK_EXPORT_FROM_PATH = './slack_export'
	SLACK_EXPORT_TO_PATH = './output'
	USE_CLOZE_SYNTAX_FOR_USERNAMES = False
	USE_CARDINAL_DATES = False

	combined_data = []
	user_names = {}
	df_copy = pd.DataFrame(columns=['Date', 'Channel', 'Data'])

	## List of available timezones: https://stackoverflow.com/questions/13866926/is-there-a-list-of-pytz-timezones
	slack_time_zone = pytz.timezone('America/Los_Angeles')
	user_ids_to_remove = {}


	def run():
	print("Getting Channel names...")
	channel_names = get_channel_names()

	print("Getting User names...")
	set_user_names()

	print("Preparing Data...")
	for idx, val in enumerate(channel_names):
	add_daily_comments_from_channel(val)

	print("Creating Markdown Pages...")
	global df_copy
	df_copy = pd.DataFrame(combined_data, columns=['Date', 'Channel', 'Data'])
	df = pd.DataFrame(combined_data, columns=['Date', 'Channel', 'Data'])
	df.groupby('Date').apply(buildDailyPages)

	print("Export complete. All markdown files are in " + SLACK_EXPORT_TO_PATH)

	def get_channel_names():
	json_file_path = SLACK_EXPORT_FROM_PATH + '/channels.json'

	with open(json_file_path, 'r') as j:
	contents = json.loads(j.read())

	channel_names = []
	for key in contents:
	channel_names.append(key['name'])

	return channel_names

	def set_user_names():
	json_file_path = SLACK_EXPORT_FROM_PATH + '/users.json'

	with open(json_file_path, 'r') as j:
	contents = json.loads(j.read())

	for key in contents:
	real_name = ''
	if ('real_name' in key):
	real_name = key['real_name']

	user_names[key['id']] = {'name': key['name'], 'real_name': real_name}


	def add_daily_comments_from_channel(channel):
	files_in_channel = glob.glob(SLACK_EXPORT_FROM_PATH + "/" + channel + "/*.json")
	for idx, file_path in enumerate(files_in_channel):
	with open(file_path, 'r') as j:
	file_name = Path(file_path).stem
	contents = json.loads(j.read())
	combined_data.append([file_name, channel, contents])


	def clean_up_text(text, user_id):
	user_name_text = ''
	if user_id in user_names:
	user_name = user_names[user_id]['name']
	real_name = user_names[user_id]['real_name']
	if (USE_CLOZE_SYNTAX_FOR_USERNAMES):
	user_name_text = "{{=: @" + user_name + " \| " + real_name + "}} : "
	else:
	user_name_text = user_name + "(" + real_name + ") : "

	users = re.findall(r"\<@U([A-Za-z0-9_]+)\>", text)
	for user in users:
	user_name_mentioned_text = ''
	user_id = 'U' + user
	if user_id in user_names:
	user_name = user_names[user_id]['name']
	real_name = user_names[user_id]['real_name']
	if (USE_CLOZE_SYNTAX_FOR_USERNAMES):
	user_name_mentioned_text = "{{=: @" + user_name + " \| " + real_name + "}}"
	else:
	user_name_mentioned_text = user_name + "(" + real_name + ") : "
	text = text.replace('<@' + user_id + '>', user_name_mentioned_text)
	return user_name_text + text.strip().replace('\n', '')

	def get_replies(replies, channel_name):
	replies_on_post = []

	for reply in replies:
	ts = reply['ts']
	date_ts = datetime.datetime.fromtimestamp(float(ts), tz=slack_time_zone).strftime("%Y-%m-%d")
	user = reply['user']
	if user in user_ids_to_remove:
	continue

	df = df_copy[(df_copy['Date'] == date_ts)]
	json_arr = df[(df['Channel'] == channel_name)]['Data']
	if (json_arr.empty):
	continue

	for message in json_arr.iloc[0]:
	if ('ts' in message and 'user' in message and message['ts'] == ts and message['user'] == user):
	replies_on_post.append(message)

	return replies_on_post

	def add_blocks(daily_note_file_path, channel_name, data):
	with open(daily_note_file_path, 'a') as file:
	file.write('- ' + '[[' + channel_name + ']]' + '\n')

	for block in data:
	if('parent_user_id' in block) or ('subtype' in block) or ('user' not in block):
	continue

	user_id = block['user']

	if user_id in user_ids_to_remove:
	continue

	text = clean_up_text(block['text'], block['user'])
	spacing = ' - '

	file.write(spacing + text + '\n')
	if ('replies' in block):
	spacing = ' - '
	replies = get_replies(block['replies'], channel_name)
	for reply_block in replies:
	text = clean_up_text(reply_block['text'], reply_block['user'])
	file.write(spacing + text + '\n')

	def make_ordinal(n):
	n = int(n)
	suffix = ['th', 'st', 'nd', 'rd', 'th'][min(n % 10, 4)]
	if 11 <= (n % 100) <= 13:
	suffix = 'th'
	return str(n) + suffix

	def get_roam_date_page_from_date(date):
	date_time_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
	full_month_name = date_time_obj.strftime("%B")
	year = date_time_obj.strftime("%Y")
	day = date_time_obj.strftime(" %d").replace(' 0', '').replace(' ', '')
	day = make_ordinal(day)
	roam_date = full_month_name + " " + day + ", " + year

	return roam_date

	def get_date_string(date):
	if (USE_CARDINAL_DATES):
	date = get_roam_date_page_from_date(date)
	else:
	date = date.replace("-", "_")

	return date

	def add_notes_to_day_page(df):
	channel_name = df['Channel'].iloc[0]
	date = df['Date'].iloc[0]
	data = df['Data'].iloc[0]

	date = get_date_string(date)

	daily_note_file_path = SLACK_EXPORT_TO_PATH + '/' + date + '.md'

	with open(daily_note_file_path, 'a') as fp:
	add_blocks(daily_note_file_path, channel_name, data)

	def buildDailyPages(df):
	date = df['Date'].iloc[0]
	date = get_date_string(date)

	print("Creating file: " + date + ".md")

	daily_note_file_path = SLACK_EXPORT_TO_PATH + '/' + date + '.md'

	with open(daily_note_file_path, 'w') as fp:
	pass

	df.groupby('Channel').apply(add_notes_to_day_page)

	if __name__ == "__main__":
	run()