Created
March 8, 2020 07:19
-
-
Save euske/1ca1a15f156d0a35334fa04bf7f38d51 to your computer and use it in GitHub Desktop.
Download Slack channel histories.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Download Slack channel histories. | |
# | |
# Prerequisites: | |
# 1. Create a Slack app. | |
# 2. Give the app the following OAuth & Permissions: | |
# channels:read, channels:history, users.profile:read | |
# 3. Install the app & get OAuth token: | |
# "xoxp-..." | |
# 4. $ pip install slackclient --user | |
# | |
# How to Use: | |
# $ python download_slack.py -O output-dir xoxp-your-token | |
# | |
import sys | |
import os | |
import json | |
from slack import WebClient | |
from slack.errors import SlackApiError | |
def download_slack(token, outdir): | |
client = WebClient(token=token) | |
profiles_cache = {} | |
print('Getting the channel list...') | |
obj = client.conversations_list() | |
channels = obj.get('channels') | |
assert channels is not None | |
for channel in channels: | |
if not channel.get('is_channel'): continue | |
if channel.get('is_archived'): continue | |
channel_id = channel.get('id') | |
sys.stdout.write(f'Getting the members in {channel_id}.') | |
sys.stdout.flush() | |
obj = client.conversations_members(channel=channel_id) | |
member_ids = obj.get('members') | |
assert member_ids is not None | |
members = {} | |
for user_id in member_ids: | |
if user_id in profiles_cache: | |
profile = profiles_cache[user_id] | |
else: | |
obj = client.users_profile_get(user=user_id) | |
profile = obj.get('profile') | |
assert profile is not None | |
profiles_cache[user_id] = profile | |
sys.stdout.write('.'); sys.stdout.flush() | |
members[user_id] = profile | |
sys.stdout.write('\n') | |
sys.stdout.write(f'Getting the messages in {channel_id}.') | |
sys.stdout.flush() | |
messages_all = [] | |
cursor = '' | |
while True: | |
try: | |
obj = client.conversations_history(channel=channel_id, cursor=cursor) | |
except SlackApiError as e: | |
print(channel, e) | |
continue | |
sys.stdout.write('.'); sys.stdout.flush() | |
messages = obj.get('messages') | |
assert messages is not None | |
messages_all.extend(messages) | |
cursor = None | |
response_metadata = obj.get('response_metadata') | |
if response_metadata is not None: | |
cursor = response_metadata.get('next_cursor') | |
continue | |
break | |
sys.stdout.write('\n') | |
data = {'channel':channel, 'members':members, 'messages':messages_all} | |
path = os.path.join(outdir, f'{channel_id}.json') | |
print(f'Exporting {len(messages_all)} messages to {path}...') | |
with open(path, 'w') as fp: | |
json.dump(data, fp) | |
return | |
def main(argv): | |
import getopt | |
def usage(): | |
print('usage: %s [-d] [-O outdir] token ...' % argv[0]) | |
return 100 | |
try: | |
(opts, args) = getopt.getopt(argv[1:], 'dO:') | |
except getopt.GetoptError: | |
return usage() | |
debug = 0 | |
outdir = '.' | |
for (k, v) in opts: | |
if k == '-d': debug += 1 | |
elif k == '-O': outdir = v | |
try: | |
os.makedirs(outdir) | |
except OSError: | |
pass | |
for token in args: | |
download_slack(token, outdir) | |
return | |
if __name__ == '__main__': sys.exit(main(sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello - thank you very much for this GIST. At this point, its 'slack_sdk', but honestly this has saved hours worth of time. We have a better understanding of the API. Much appreciated.