Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Download Slack Channel/PrivateChannel/DirectMessage History
# MIT License
# Copyright (c) 2016 Chandler Abraham
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from slacker import Slacker
import json
import argparse
import os
# This script finds all channels, private channels and direct messages
# that your user participates in, downloads the complete history for
# those converations and writes each conversation out to seperate json files.
#
# This user centric history gathering is nice because the official slack data exporter
# only exports public channels.
#
# PS, this only works if your slack team has a paid account which allows for unlimited history.
#
# PPS, this use of the API is blessed by Slack.
# https://get.slack.help/hc/en-us/articles/204897248
# " If you want to export the contents of your own private groups and direct messages
# please see our API documentation."
#
# get your slack user token at the bottom of this page
# https://api.slack.com/web
#
# dependencies:
# pip install slacker # https://github.com/os/slacker
#
# usage examples
# python slack_history.py --token='123token'
# python slack_history.py --token='123token' --dryRun=True
# python slack_history.py --token='123token' --skipDirectMessages
# python slack_history.py --token='123token' --skipDirectMessages --skipPrivateChannels
# fetches the complete message history for a channel/group/im
#
# pageableObject could be:
# slack.channel
# slack.groups
# slack.im
#
# channelId is the id of the channel/group/im you want to download history for.
def getHistory(pageableObject, channelId, pageSize = 100):
messages = []
lastTimestamp = None
while(True):
response = pageableObject.history(
channel = channelId,
latest = lastTimestamp,
oldest = 0,
count = pageSize
).body
messages.extend(response['messages'])
if (response['has_more'] == True):
lastTimestamp = messages[-1]['ts'] # -1 means last element in a list
else:
break
return messages
def mkdir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
# fetch and write history for all public channels
def getChannels(slack, dryRun):
channels = slack.channels.list().body['channels']
print("\nfound channels: ")
for channel in channels:
print(channel['name'])
if not dryRun:
parentDir = "channels"
mkdir(parentDir)
for channel in channels:
print("getting history for channel {0}".format(channel['name']))
fileName = "{parent}/{file}.json".format(parent = parentDir, file = channel['name'])
messages = getHistory(slack.channels, channel['id'])
channelInfo = slack.channels.info(channel['id']).body['channel']
with open(fileName, 'w') as outFile:
print("writing {0} records to {1}".format(len(messages), fileName))
json.dump({'channel_info': channelInfo, 'messages': messages }, outFile, indent=4)
# fetch and write history for all direct message conversations
# also known as IMs in the slack API.
def getDirectMessages(slack, ownerId, userIdNameMap, dryRun):
dms = slack.im.list().body['ims']
print("\nfound direct messages (1:1) with the following users:")
for dm in dms:
print(userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)"))
if not dryRun:
parentDir = "direct_messages"
mkdir(parentDir)
for dm in dms:
name = userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")
print("getting history for direct messages with {0}".format(name))
fileName = "{parent}/{file}.json".format(parent = parentDir, file = name)
messages = getHistory(slack.im, dm['id'])
channelInfo = {'members': [dm['user'], ownerId]}
with open(fileName, 'w') as outFile:
print("writing {0} records to {1}".format(len(messages), fileName))
json.dump({'channel_info': channelInfo, 'messages': messages}, outFile, indent=4)
# fetch and write history for all private channels
# also known as groups in the slack API.
def getPrivateChannels(slack, dryRun):
groups = slack.groups.list().body['groups']
print("\nfound private channels:")
for group in groups:
print("{0}: ({1} members)".format(group['name'], len(group['members'])))
if not dryRun:
parentDir = "private_channels"
mkdir(parentDir)
for group in groups:
messages = []
print("getting history for private channel {0} with id {1}".format(group['name'], group['id']))
fileName = "{parent}/{file}.json".format(parent = parentDir, file = group['name'])
messages = getHistory(slack.groups, group['id'])
channelInfo = slack.groups.info(group['id']).body['group']
with open(fileName, 'w') as outFile:
print("writing {0} records to {1}".format(len(messages), fileName))
json.dump({'channel_info': channelInfo, 'messages': messages}, outFile, indent=4)
# fetch all users for the channel and return a map userId -> userName
def getUserMap(slack):
#get all users in the slack organization
users = slack.users.list().body['members']
userIdNameMap = {}
for user in users:
userIdNameMap[user['id']] = user['name']
print("found {0} users ".format(len(users)))
return userIdNameMap
# get basic info about the slack channel to ensure the authentication token works
def doTestAuth(slack):
testAuth = slack.auth.test().body
teamName = testAuth['team']
currentUser = testAuth['user']
print("Successfully authenticated for team {0} and user {1} ".format(teamName, currentUser))
return testAuth
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='download slack history')
parser.add_argument('--token', help="an api token for a slack user")
parser.add_argument(
'--dryRun',
action='store_true',
default=False,
help="if dryRun is true, don't fetch/write history only get channel names")
parser.add_argument(
'--skipPrivateChannels',
action='store_true',
default=False,
help="skip fetching history for private channels")
parser.add_argument(
'--skipChannels',
action='store_true',
default=False,
help="skip fetching history for channels")
parser.add_argument(
'--skipDirectMessages',
action='store_true',
default=False,
help="skip fetching history for directMessages")
args = parser.parse_args()
slack = Slacker(args.token)
testAuth = doTestAuth(slack)
userIdNameMap = getUserMap(slack)
dryRun = args.dryRun
if not dryRun:
with open('metadata.json', 'w') as outFile:
print("writing metadata")
metadata = {
'auth_info': testAuth,
'users': userIdNameMap
}
json.dump(metadata, outFile, indent=4)
if not args.skipChannels:
getChannels(slack, dryRun)
if not args.skipPrivateChannels:
getPrivateChannels(slack, dryRun)
if not args.skipDirectMessages:
getDirectMessages(slack, testAuth['user_id'], userIdNameMap, dryRun)

Hi

A great script that works for end user. Could you however may be somehow tweak your script to make it resemble like a standard slack export file? The reason I ask is that then the following useful utilities for rendering thse json files could be used on dumps from your script.

  1. https://gist.github.com/levelsio/122907e95956602e5c09- Requires php knowledge also assumes a standard slack zip export
  2. https://github.com/hfaran/slack-export-viewer - Probably provides the desired functionality, but also assumes standard slack zip export
  3. https://gist.github.com/jordanmkoncz/0ce0ce11a3359209f48949eefee945ce - This tool requires import from slack-history-export tool, which I find unable to run (See #6)

Any help guys.. some newbie instructions would be great

Thanks

piranna commented Oct 31, 2016

Could it be possible to download shared files and snippets too?

minniel commented Nov 11, 2016

Just letting anyone who's interested know that I've made modifications on my fork so the export format more closely resembles that of a typical slack export (i.e. channel data is stored in individual files by date). However, I haven't yet made any changes to the overall directory tree, which I will be doing next to get it to match a slack export. I've converted my fork to a repo, to better keep track of commit changes: https://github.com/minniel/slack_history

I'll be specifically making changes to ensure that it functions with this viewer: https://gist.github.com/levelsio/122907e95956602e5c09-

@github3332 this is relevant to your earlier comment.

shadycuz commented Dec 20, 2016

I'm not sure if this works with the latest version of slacker. I'm getting

 Traceback (most recent call last):
  File "slack_history.py", line 182, in <module>
    testAuth = doTestAuth(slack)
  File "slack_history.py", line 143, in doTestAuth
    testAuth = slack.auth.test().body
  File "C:\Users\lblaney_c\AppData\Local\Programs\Python\Python35-32\lib\site-pa
ckages\slacker\__init__.py", line 84, in test
    return self.get('auth.test')
  File "C:\Users\lblaney_c\AppData\Local\Programs\Python\Python35-32\lib\site-pa
ckages\slacker\__init__.py", line 68, in get
    return self._request(requests.get, api, **kwargs)
  File "C:\Users\lblaney_c\AppData\Local\Programs\Python\Python35-32\lib\site-pa
ckages\slacker\__init__.py", line 63, in _request
    raise Error(response.error)
slacker.Error: invalid_auth

I'm using my bot token and I can call the channel history manually.

edit:

usage examples

python slack_history.py --token='123token'

python slack_history.py --token='123token' --dryRun=True

python slack_history.py --token='123token' --skipDirectMessages

python slack_history.py --token='123token' --skipDirectMessages --skipPrivateChannels

omg so sorry, I had ' ' wrapped around my token...

I need to use slack2html but I only have the old format where its all one big giant file. I can't access the logs as the original team has been dropped to free status.

you can get both gists here: https://gist.github.com/minniel/

jham1 commented Mar 12, 2017

Thanks for this! Any thoughts on adding extra arguments that allow you to specify channel ID's to include / exclude from the output?

Thanks for making this available! It helped me out today :D FYI for others, this requires using the "legacy Slack API tokens" https://api.slack.com/custom-integrations/legacy-tokens .

Benoit99 commented Jul 21, 2017

Hi there,
When I go to run this I run into throttling issues from Slack. It starts getting the channel history and then part way through the list of channels it errors out with "requests.exceptions.HTTPError: 429 Client Error: Too Many Requests for url: https://slack.com/api/channels.history?channel=C24KKPRT6&latest=1485907199&oldest=1483228800&count=300&inclusive=0&unreads=0&token=...."
Looks like I'm exceeding what slack will let me pull down at once. Is there anyway to work around this? Like to avoiding archived channels, avoid messages from bots, run the request in small batches with built in pauses, something else?
Thanks!

Hey, does this include exporting reactions along with the messages?
Also wondering if compliance-level tier is required to get DMs with this, i.e. Plus level ? Thanks

f0ster commented Aug 4, 2017

@Benoit99 I have a hacked fix for that here, https://gist.github.com/f0ster/ab3b8bc748c0779a53ceed11d46b1303 . It basically has a configured wait/backoff and will keep retrying as many times as you specify (wrapped around each call to slack /getHistory)

Here it is set to try every 2 seconds up to 10000x for each URI resource it fails on.

The slack docs say they rate limit to 1req/sec

pavyer commented Aug 10, 2017

The script also has a skipChannels option which skips Channels and exports only direct messages and private channels. Private channels include multi person direct messages.
python slack_history.py --token='my-secret-token' --skipChannels

What is the syntax to export a specific channel? I don't need all of the messages from all of my company's slack account to be export just a single channel?

cueo commented Oct 27, 2017

The token is the client secret or the verification token?

I get slacker.Error: invalid_auth for both.

geosava commented Oct 27, 2017

for anyone having slacker.Error: invalid_auth please try without the '' around your token.

python slack_history.py --token=my-secret-token

It worked for me.

geosava commented Oct 27, 2017

Is there anywhere any tool that will help me physically read the exported files from this program?

I have found this: https://github.com/hfaran/slack-export-viewer
But it works only for slack's export zip structure.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment