# MIT License | |
# Copyright (c) 2016 Chandler Abraham | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
from slacker import Slacker | |
import json | |
import argparse | |
import os | |
# This script finds all channels, private channels and direct messages | |
# that your user participates in, downloads the complete history for | |
# those converations and writes each conversation out to seperate json files. | |
# | |
# This user centric history gathering is nice because the official slack data exporter | |
# only exports public channels. | |
# | |
# PS, this only works if your slack team has a paid account which allows for unlimited history. | |
# | |
# PPS, this use of the API is blessed by Slack. | |
# https://get.slack.help/hc/en-us/articles/204897248 | |
# " If you want to export the contents of your own private groups and direct messages | |
# please see our API documentation." | |
# | |
# get your slack user token at the bottom of this page | |
# https://api.slack.com/web | |
# | |
# dependencies: | |
# pip install slacker # https://github.com/os/slacker | |
# | |
# usage examples | |
# python slack_history.py --token='123token' | |
# python slack_history.py --token='123token' --dryRun=True | |
# python slack_history.py --token='123token' --skipDirectMessages | |
# python slack_history.py --token='123token' --skipDirectMessages --skipPrivateChannels | |
# fetches the complete message history for a channel/group/im | |
# | |
# pageableObject could be: | |
# slack.channel | |
# slack.groups | |
# slack.im | |
# | |
# channelId is the id of the channel/group/im you want to download history for. | |
def getHistory(pageableObject, channelId, pageSize = 100): | |
messages = [] | |
lastTimestamp = None | |
while(True): | |
response = pageableObject.history( | |
channel = channelId, | |
latest = lastTimestamp, | |
oldest = 0, | |
count = pageSize | |
).body | |
messages.extend(response['messages']) | |
if (response['has_more'] == True): | |
lastTimestamp = messages[-1]['ts'] # -1 means last element in a list | |
else: | |
break | |
return messages | |
def mkdir(directory): | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
# fetch and write history for all public channels | |
def getChannels(slack, dryRun): | |
channels = slack.channels.list().body['channels'] | |
print("\nfound channels: ") | |
for channel in channels: | |
print(channel['name']) | |
if not dryRun: | |
parentDir = "channels" | |
mkdir(parentDir) | |
for channel in channels: | |
print("getting history for channel {0}".format(channel['name'])) | |
fileName = "{parent}/{file}.json".format(parent = parentDir, file = channel['name']) | |
messages = getHistory(slack.channels, channel['id']) | |
channelInfo = slack.channels.info(channel['id']).body['channel'] | |
with open(fileName, 'w') as outFile: | |
print("writing {0} records to {1}".format(len(messages), fileName)) | |
json.dump({'channel_info': channelInfo, 'messages': messages }, outFile, indent=4) | |
# fetch and write history for all direct message conversations | |
# also known as IMs in the slack API. | |
def getDirectMessages(slack, ownerId, userIdNameMap, dryRun): | |
dms = slack.im.list().body['ims'] | |
print("\nfound direct messages (1:1) with the following users:") | |
for dm in dms: | |
print(userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")) | |
if not dryRun: | |
parentDir = "direct_messages" | |
mkdir(parentDir) | |
for dm in dms: | |
name = userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)") | |
print("getting history for direct messages with {0}".format(name)) | |
fileName = "{parent}/{file}.json".format(parent = parentDir, file = name) | |
messages = getHistory(slack.im, dm['id']) | |
channelInfo = {'members': [dm['user'], ownerId]} | |
with open(fileName, 'w') as outFile: | |
print("writing {0} records to {1}".format(len(messages), fileName)) | |
json.dump({'channel_info': channelInfo, 'messages': messages}, outFile, indent=4) | |
# fetch and write history for all private channels | |
# also known as groups in the slack API. | |
def getPrivateChannels(slack, dryRun): | |
groups = slack.groups.list().body['groups'] | |
print("\nfound private channels:") | |
for group in groups: | |
print("{0}: ({1} members)".format(group['name'], len(group['members']))) | |
if not dryRun: | |
parentDir = "private_channels" | |
mkdir(parentDir) | |
for group in groups: | |
messages = [] | |
print("getting history for private channel {0} with id {1}".format(group['name'], group['id'])) | |
fileName = "{parent}/{file}.json".format(parent = parentDir, file = group['name']) | |
messages = getHistory(slack.groups, group['id']) | |
channelInfo = slack.groups.info(group['id']).body['group'] | |
with open(fileName, 'w') as outFile: | |
print("writing {0} records to {1}".format(len(messages), fileName)) | |
json.dump({'channel_info': channelInfo, 'messages': messages}, outFile, indent=4) | |
# fetch all users for the channel and return a map userId -> userName | |
def getUserMap(slack): | |
#get all users in the slack organization | |
users = slack.users.list().body['members'] | |
userIdNameMap = {} | |
for user in users: | |
userIdNameMap[user['id']] = user['name'] | |
print("found {0} users ".format(len(users))) | |
return userIdNameMap | |
# get basic info about the slack channel to ensure the authentication token works | |
def doTestAuth(slack): | |
testAuth = slack.auth.test().body | |
teamName = testAuth['team'] | |
currentUser = testAuth['user'] | |
print("Successfully authenticated for team {0} and user {1} ".format(teamName, currentUser)) | |
return testAuth | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='download slack history') | |
parser.add_argument('--token', help="an api token for a slack user") | |
parser.add_argument( | |
'--dryRun', | |
action='store_true', | |
default=False, | |
help="if dryRun is true, don't fetch/write history only get channel names") | |
parser.add_argument( | |
'--skipPrivateChannels', | |
action='store_true', | |
default=False, | |
help="skip fetching history for private channels") | |
parser.add_argument( | |
'--skipChannels', | |
action='store_true', | |
default=False, | |
help="skip fetching history for channels") | |
parser.add_argument( | |
'--skipDirectMessages', | |
action='store_true', | |
default=False, | |
help="skip fetching history for directMessages") | |
args = parser.parse_args() | |
slack = Slacker(args.token) | |
testAuth = doTestAuth(slack) | |
userIdNameMap = getUserMap(slack) | |
dryRun = args.dryRun | |
if not dryRun: | |
with open('metadata.json', 'w') as outFile: | |
print("writing metadata") | |
metadata = { | |
'auth_info': testAuth, | |
'users': userIdNameMap | |
} | |
json.dump(metadata, outFile, indent=4) | |
if not args.skipChannels: | |
getChannels(slack, dryRun) | |
if not args.skipPrivateChannels: | |
getPrivateChannels(slack, dryRun) | |
if not args.skipDirectMessages: | |
getDirectMessages(slack, testAuth['user_id'], userIdNameMap, dryRun) |
This comment has been minimized.
This comment has been minimized.
Could it be possible to download shared files and snippets too? |
This comment has been minimized.
This comment has been minimized.
Just letting anyone who's interested know that I've made modifications on my fork so the export format more closely resembles that of a typical slack export (i.e. channel data is stored in individual files by date). However, I haven't yet made any changes to the overall directory tree, which I will be doing next to get it to match a slack export. I've converted my fork to a repo, to better keep track of commit changes: https://github.com/minniel/slack_history I'll be specifically making changes to ensure that it functions with this viewer: https://gist.github.com/levelsio/122907e95956602e5c09- @github3332 this is relevant to your earlier comment. |
This comment has been minimized.
This comment has been minimized.
I'm not sure if this works with the latest version of slacker. I'm getting
I'm using my bot token and I can call the channel history manually. edit:
omg so sorry, I had ' ' wrapped around my token... |
This comment has been minimized.
This comment has been minimized.
Both of your links came up dead: https://github.com/minniel/slack_history |
This comment has been minimized.
This comment has been minimized.
I need to use slack2html but I only have the old format where its all one big giant file. I can't access the logs as the original team has been dropped to free status. |
This comment has been minimized.
This comment has been minimized.
you can get both gists here: https://gist.github.com/minniel/ |
This comment has been minimized.
This comment has been minimized.
Thanks for this! Any thoughts on adding extra arguments that allow you to specify channel ID's to include / exclude from the output? |
This comment has been minimized.
This comment has been minimized.
Thanks for making this available! It helped me out today :D FYI for others, this requires using the "legacy Slack API tokens" https://api.slack.com/custom-integrations/legacy-tokens . |
This comment has been minimized.
This comment has been minimized.
Hi there, |
This comment has been minimized.
This comment has been minimized.
Hey, does this include exporting reactions along with the messages? |
This comment has been minimized.
This comment has been minimized.
@Benoit99 I have a hacked fix for that here, https://gist.github.com/f0ster/ab3b8bc748c0779a53ceed11d46b1303 . It basically has a configured wait/backoff and will keep retrying as many times as you specify (wrapped around each call to slack /getHistory) Here it is set to try every 2 seconds up to 10000x for each URI resource it fails on. The slack docs say they rate limit to 1req/sec |
This comment has been minimized.
This comment has been minimized.
The script also has a |
This comment has been minimized.
This comment has been minimized.
What is the syntax to export a specific channel? I don't need all of the messages from all of my company's slack account to be export just a single channel? |
This comment has been minimized.
This comment has been minimized.
The token is the client secret or the verification token? I get |
This comment has been minimized.
This comment has been minimized.
for anyone having slacker.Error: invalid_auth please try without the '' around your token. python slack_history.py --token=my-secret-token It worked for me. |
This comment has been minimized.
This comment has been minimized.
Is there anywhere any tool that will help me physically read the exported files from this program? I have found this: https://github.com/hfaran/slack-export-viewer |
This comment has been minimized.
This comment has been minimized.
|
This comment has been minimized.
This comment has been minimized.
Hey Guys! I know that most people here wanted to export the data and view it in the slack export viewer. I have updated this basic implementation to support some more features such as exporting only specific private channels and properly exporting to work with in slack export viewer. I created a repo that can be expanded upon. I tested most of the features but it may have issues still. Feel free to create issues so others know and I might fix them. https://github.com/zach-snell/slack-export Enjoy |
This comment has been minimized.
This comment has been minimized.
Thank you, so far so good. I got the This script lists all the messages in a single file, whereas your tool lists them by folder then date. I guess that is the slack export format and I should use the slack export viewer? Update: Yes. That's right. I was able to use @zach-snell's tool without encountering
After it's done, go into the directory and zip the file, then view in the viewer.
cd 20190107-164818-slack_export/
zip -r ../20190107-164818-slack_export.zip *
cd ..
slack-export-viewer -z 20190107-164818-slack_export.zip This is because |
This comment has been minimized.
This comment has been minimized.
@sunapi386 see minniel/slack_history#2 for fixing the 429 error. |
This comment has been minimized.
This comment has been minimized.
Hey Everyone, I've taken this code and updated it download messages directly into Excel files on your local computer. Check it out! https://gist.github.com/nickcanfield29/7e3cef8f3ca58d3dfde205d2bb17bf02 |
This comment has been minimized.
This comment has been minimized.
Hi, e return this error. I don't quite understand, I'm a little lost in mistakes. |
This comment has been minimized.
This comment has been minimized.
The missing scope error is due to "incomplete" permissions via OAuth. I just got bored and added the entire suite of permissions. But now I get another error :(
|
This comment has been minimized.
This comment has been minimized.
Use this code below. I've used new methods that are not deprecated.
|
This comment has been minimized.
This comment has been minimized.
it seems that slack has disabled ways to create tokens now may 2020, Since the script depends on having token in its argument , is there any way for the script to work without token ? |
This comment has been minimized.
This comment has been minimized.
Yes. You need to create a slack app. Then you can get your token. Just be
sure to include the right scopes. If you'd like, I can help you with that.
…On Wed, Sep 16, 2020, 8:11 PM kunalvaidya ***@***.***> wrote:
***@***.**** commented on this gist.
------------------------------
it seems that slack has disabled ways to create tokens now may 2020, Since
the script depends on having token in its argument , is there any way for
the script to work without token ?
https://api.slack.com/legacy/custom-integrations/legacy-tokens
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<https://gist.github.com/fb7a070f52883849de35#gistcomment-3456237>, or
unsubscribe
<https://github.com/notifications/unsubscribe-auth/AG5VVY3CWOBRRGAEIBPXT7TSGC2PRANCNFSM4MEOB2QQ>
.
|
This comment has been minimized.
This comment has been minimized.
Thanks, @nickcanfield29 for guidance. I tried the Evernote clip method but I don't have the paid version of Evernote so in one direct message itself I reached the limit. |
This comment has been minimized.
This comment has been minimized.
the link does not work |
This comment has been minimized.
This comment has been minimized.
Does anyone know if using this script on a free account wouldn't just download the last 10,000 messages as opposed to the entire history? |
This comment has been minimized.
Hi
A great script that works for end user. Could you however may be somehow tweak your script to make it resemble like a standard slack export file? The reason I ask is that then the following useful utilities for rendering thse json files could be used on dumps from your script.
Any help guys.. some newbie instructions would be great
Thanks