-
-
Save levelsio/122907e95956602e5c09 to your computer and use it in GitHub Desktop.
<? | |
///////////////////// | |
// slack2html | |
// by @levelsio | |
///////////////////// | |
// | |
///////////////////// | |
// WHAT DOES THIS DO? | |
///////////////////// | |
// | |
// Slack lets you export the chat logs (back to the first messages!), even if | |
// you are a free user (and have a 10,000 user limit) | |
// | |
// This is pretty useful for big chat groups (like mine, #nomads), where you | |
// do wanna see the logs, but can't see them within Slack | |
// | |
// Problem is that Slack exports it as JSON files, which is a bit unusable, | |
// so this script makes it into actual HTML files that look like Slack chats | |
// | |
/////////////////// | |
// INSTRUCTIONS | |
/////////////////// | |
// | |
// Run this script inside the directory of an extracted (!) Slack export zip | |
// e.g. "/tmp/#nomads Slack export Aug 25 2015" like this: | |
// MacBook-Pro:#nomads Slack export Aug 25 2015 mbp$ php slack2html.php | |
// | |
// It will then make two dirs: | |
// /slack2html/json | |
// /slack2html/html | |
// | |
// In the JSON dir it will put each channels chat log combined from all the | |
// daily logs that Slack outputs (e.g. /channel/2014-11-26.json) | |
// | |
// In the HTML dir it will generate HTML files with Slack's typical styling. | |
// It will also create an index.html that shows all channels | |
// | |
/////////////////// | |
// FEEDBACK | |
/////////////////// | |
// | |
// Let me know any bugs by tweeting me @levelsio | |
// | |
// Hope this helps! | |
// | |
// Pieter @levelsio | |
// | |
///////////////////// | |
ini_set('memory_limit', '1024M'); | |
date_default_timezone_set('UTC'); | |
mb_internal_encoding("UTF-8"); | |
error_reporting(E_ERROR); | |
// <config> | |
$stylesheet=" | |
* { | |
font-family:sans-serif; | |
} | |
body { | |
text-align:center; | |
padding:1em; | |
} | |
.messages { | |
width:100%; | |
max-width:700px; | |
text-align:left; | |
display:inline-block; | |
} | |
.messages img { | |
background-color:rgb(248,244,240); | |
width:36px; | |
height:36px; | |
border-radius:0.2em; | |
display:inline-block; | |
vertical-align:top; | |
margin-right:0.65em; | |
} | |
.messages .time { | |
display:inline-block; | |
color:rgb(200,200,200); | |
margin-left:0.5em; | |
} | |
.messages .username { | |
display:inline-block; | |
font-weight:600; | |
line-height:1; | |
} | |
.messages .message { | |
display:inline-block; | |
vertical-align:top; | |
line-height:1; | |
width:calc(100% - 3em); | |
} | |
.messages .message .msg { | |
line-height:1.5; | |
} | |
"; | |
// </config> | |
// <compile daily logs into single channel logs> | |
$files=scandir(__DIR__); | |
$baseDir=__DIR__.'/../slack2html'; | |
$jsonDir=$baseDir.'/'.'json'; | |
if(!is_dir($baseDir)) mkdir($baseDir); | |
if(!is_dir($jsonDir)) mkdir($jsonDir); | |
foreach($files as $channel) { | |
if($channel=='.' || $channel=='..') continue; | |
if(is_dir($channel)) { | |
$channelJsonFile=$jsonDir.'/'.$channel.'.json'; | |
if(file_exists($channelJsonFile)) { | |
echo "JSON already exists ".$channelJsonFile."\n"; | |
continue; | |
} | |
unset($chats); | |
$chats=array(); | |
echo '====='."\n"; | |
echo 'Combining JSON files for #'.$channel."\n"; | |
echo '====='."\n"; | |
$dates=scandir(__DIR__.'/'.$channel); | |
foreach($dates as $date) { | |
if(!is_dir($date)) { | |
echo '.'; | |
$messages=json_decode(file_get_contents(__DIR__.'/'.$channel.'/'.$date),true); | |
if(empty($messages)) continue; | |
foreach($messages as $message) { | |
array_push($chats,$message); | |
} | |
} | |
} | |
echo "\n"; | |
file_put_contents($channelJsonFile,json_encode($chats)); | |
echo number_format(count($chats)).' messages exported to '.$channelJsonFile."\n"; | |
} | |
} | |
// </compile daily logs into single channel logs> | |
// <load users file> | |
$users=json_decode(file_get_contents(__DIR__.'/'.'users.json'),true); | |
$usersById=array(); | |
foreach($users as $user) { | |
$usersById[$user['id']]=$user; | |
} | |
// </load users file> | |
// <load channels file> | |
$channels=json_decode(file_get_contents(__DIR__.'/'.'channels.json'),true); | |
$channelsById=array(); | |
foreach($channels as $channel) { | |
$channelsById[$channel['id']]=$channel; | |
} | |
// </load channels file> | |
// <generate html from channel logs> | |
$htmlDir=$baseDir.'/'.'html'; | |
if(!is_dir($htmlDir)) mkdir($htmlDir); | |
$channels=scandir($jsonDir); | |
$channelNames=array(); | |
$mostRecentChannelTimestamps=array(); | |
foreach($channels as $channel) { | |
if($channel=='.' || $channel=='..') continue; | |
if(is_dir($channel)) continue; | |
$mostRecentChannelTimestamp=0; | |
if($message['ts']>$mostRecentChannelTimestamp) { | |
$mostRecentChannelTimestamp=$message['ts']; | |
} | |
$array=explode('.json',$channel); | |
$channelName=$array[0]; | |
$channelHtmlFile=$htmlDir.'/'.$channelName.'.html'; | |
if(file_exists($channelHtmlFile)) { | |
echo "HTML already exists ".$channelJsonFile."\n"; | |
continue; | |
} | |
array_push($channelNames,$channelName); | |
echo '====='."\n"; | |
echo 'Generating HTML for #'.$channelName."\n"; | |
echo '====='."\n"; | |
$messages=json_decode(file_get_contents($jsonDir.'/'.$channel),true); | |
if(empty($messages)) continue; | |
$htmlMessages='<html><body><style>'.$stylesheet.'</style><div class="messages">'; | |
foreach($messages as $message) { | |
if(empty($message)) continue; | |
if(empty($message['text'])) continue; | |
echo '.'; | |
// change <@U38A3DE9> into levelsio | |
if(stripos($message['text'],'<@')!==false) { | |
$usersInMessage=explode('<@',$message['text']); | |
foreach($usersInMessage as $userInMessage) { | |
$array=explode('>',$userInMessage); | |
$userHandleInBrackets=$array[0]; | |
$array=explode('|',$array[0]); | |
$userInMessage=$array[0]; | |
$username=$array[1]; | |
if(empty($username)) { | |
$username=$usersById[$userInMessage]['name']; | |
} | |
$message['text']=str_replace('<@'.$userHandleInBrackets.'>','@'.$username,$message['text']); | |
} | |
} | |
// change <#U38A3DE9> into #_chiang-mai | |
if(stripos($message['text'],'<#')!==false) { | |
$channelsInMessage=explode('<#',$message['text']); | |
foreach($channelsInMessage as $channelInMessage) { | |
$array=explode('>',$channelInMessage); | |
$channelHandleInBrackets=$array[0]; | |
$array=explode('|',$array[0]); | |
$channelInMessage=$array[0]; | |
$channelNameInMessage=$array[1]; | |
if(empty($username)) { | |
$channelNameInMessage=$channelsById[$channelInMessage]['name']; | |
} | |
if(!empty($username)) { | |
$message['text']=str_replace('<#'.$channelHandleInBrackets.'>','#'.$channelNameInMessage,$message['text']); | |
} | |
} | |
} | |
// change <http://url> into link | |
if(stripos($message['text'],'<http')!==false) { | |
$linksInMessage=explode('<http',$message['text']); | |
foreach($linksInMessage as $linkInMessage) { | |
$array=explode('>',$linkInMessage); | |
$linkTotalInBrackets=$array[0]; | |
$array=explode('|',$array[0]); | |
$linkInMessage=$array[0]; | |
$message['text']=str_replace('<http'.$linkTotalInBrackets.'>','<a href="http'.$linkInMessage.'">http'.$linkInMessage.'</a>',$message['text']); | |
} | |
} | |
// change @levelsio has joined the channel into | |
// @levelsio\n has joined #channel | |
if(stripos($message['text'],'has joined the channel')!==false) { | |
$message['text']=str_replace('the channel','#'.$channelName,$message['text']); | |
$message['text']=str_replace('@'.$usersById[$message['user']]['name'].' ','',$message['text']); | |
} | |
$array=explode('.',$message['ts']); | |
$time=$array[0]; | |
$message['text']=utf8_decode($message['text']); | |
$htmlMessage=''; | |
$htmlMessage.='<div><img src="'.$usersById[$message['user']]['profile']['image_72'].'" /><div class="message"><div class="username">'.$usersById[$message['user']]['name'].'</div><div class="time">'.date('Y-m-d H:i',$message['ts']).'</div><div class="msg">'.$message['text']."</div></div></div><br/>\n"; | |
$htmlMessages.=$htmlMessage; | |
} | |
$htmlMessages.='</div></body></html>'; | |
file_put_contents($channelHtmlFile,$htmlMessages); | |
$mostRecentChannelTimestamps[$channelName]=$mostRecentChannelTimestamp; | |
echo "\n"; | |
} | |
asort($mostRecentChannelTimestamps); | |
$mostRecentChannelTimestamps=array_reverse($mostRecentChannelTimestamps); | |
// </generate html from channel logs> | |
// <make index html> | |
$html='<html><body><style>'.$stylesheet.'</style><div class="messages">'; | |
foreach($mostRecentChannelTimestamps as $channel => $timestamp) { | |
$html.='<a href="./'.$channel.'.html">#'.$channel.'</a> '.date('Y-m-d H:i',$timestamp).'<br/>'."\n"; | |
} | |
$html.='</div></body></html>'; | |
file_put_contents($htmlDir.'/index.html',$html); | |
// </make index html> | |
?> |
Does that work for Windows?
Great post and works like a charm. Well done Sir! 👍
I had a memory problem when running is on my mac. The error was:
Fatal error: Allowed memory size of 1073741824 bytes exhausted (tried to allocate 32 bytes) in /Users/c2349192/workspace/slack-archives/Team Awesome Slack export Mar 20 2017/slack2html.php on line 186
I increased the memory limit on line 50 and it completed with no problems.
ini_set('memory_limit;, '2048M');
For windows:,
- Install Wamp Server (http://www.wampserver.com/en/)
- In the install directory, find the location of php.exe. You'll need reference this path later. (i.e "C:\Wamp64\bin\php\php7.0.0\php.exe" )
- Create a folder on your desktop called 'Slack'. Download the Slack message history zip. Extract to 'Slack' folder. You'll need to reference this path later. (i.e "C:\Users\YourName\Desktop\Slack\Slack_Project_Name Slack export Sep 1 2017"
- Download the file from this repository (gistfile1.txt) and store it in the same folder as Step 3.
- Rename gistFile1.txt to GenerateSlackReport.php
- Open the command prompt, (Start->Run->Cmd) (You may need to find this program and right click to Run as Administrator)
- Change directory to the location of Step 3. (ie. "cd C:/Users/YourName/Desktop/Slack/Slack_Project_Name Slack export Sep 1 2017"
- In the command prompt, execute this command, replacing your php.exe location with your own: "C:\Wamp64\bin\php\php7.0.0\php.exe" GenerateReport.php
Reports are generated now. Hope this helps.
Hi
As far as I see, this script does not create code to see pictures on the local page, it renders only a link to the slack url of the picture, doesn't it?
Indeed, it would be nice to have it be able to download pictures in a local folder.
Any chance to see a improved code (which is indeed very very useful)
Thanks Cheers
mario
Hey, as I don't want to install PHP on my system I just used a basic PHP docker image.
Requirements:
- Docker Engine (docker-ce is great)
Steps:
- Create a temporary dir
mkdir $HOME/slack2html/
- Decompress zip file with messages in that temporary folder
cd $HOME/slack2html/ ; unzip your_slack_file.zip
- Download gist file as slack2html.php
wget -O slack2html.php https://gist.githubusercontent.com/levelsio/122907e95956602e5c09/raw/6ea53ecfb936f4f0fbbcbb74bb7b6db5030ec64b/gistfile1.txt
- Run Docker Container with PHP
docker run -ti --rm -v $HOME/slack2html:/mnt/slack_data \
-v $HOME/slack2html_output:/mnt/slack2html/ \
php:7 bash -c 'cd /mnt/slack_data && php slack2html.php'
- Check result at output folder
ls -al $HOME/slack2html_output/html/
firefox $HOME/slack2html_output/html/index.html
Clean after yourself
If you are not a PHP-docker-image user, you may wanna remove that previously downloaded image that takes (at this time) ~350MiB of storage.
docker rmi php:7
Results
__ ___ _____ ___ ___ _ __ ___ ___ _ __ ___ ___ ___
/ _` \ \ /\ / / _ \/ __|/ _ \| '_ ` _ \ / _ \ '_ \ / _ \/ __/ __|
| (_| |\ V V / __/\__ \ (_) | | | | | | __/ | | | __/\__ \__ \
\__,_| \_/\_/ \___||___/\___/|_| |_| |_|\___|_| |_|\___||___/___/
@mario6097, you may wanna check my gist for replacing those avatars with locally downloaded avatars.
It's written in python as it's been years since I don't code in PHP.
Im noticing that this script skips messages where "text": null. I have many messages where this is the case, usually from a bot. There is message information in the attachment though. Would it be possible to have these messages included?
also, would it be possible to keep each day in its own file rather than combining them? a single file for a whole channel can be very unwieldy
Great script altogether.
Try changing <? to <?php
@levelsio I also found I needed to insert the top line as:
#!/usr/bin/env php
On UNIX.
👍 Thank you!
Also needed to add 'php' to the top line, and install php-mbstring
to resolve that mb_internal_encoding() error mentioned earlier
Very nice! This was extremely helpful. Thanks for sharing it!
sadly doesn’t seem to handle emoji reactions .. anyone implemented this already?
Would it be possible to have it maintain the one day per channel structure that Slack provides rather than combining multiple days?
There are updated tools for this -- https://github.com/zach-snell/slack-export and https://github.com/hfaran/slack-export-viewer
I used the slack-export which worked great but slack-export-viewer keeps giving me these errors:
MacBook-Pro-17:slack-export-master davis$ slack-export-viewer -p 80 -I 192.168.x.x --debug -z test-export.zip.zip
WARNING: DEBUG MODE IS ENABLED!
test-export.zip.zip extracting to /var/folders/p1/p83kf_2n3l37p7yxgmn3113m0000gn/T/_slackviewer/4d730203a1403212f1458d8709540e73a056eec3...
test-export.zip.zip extracted to /var/folders/p1/p83kf_2n3l37p7yxgmn3113m0000gn/T/_slackviewer/4d730203a1403212f1458d8709540e73a056eec3
Traceback (most recent call last):
File "/usr/local/bin/slack-export-viewer", line 8, in
sys.exit(main())
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 829, in call
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/slackviewer/main.py", line 61, in main
configure_app(app, archive, channels, no_sidebar, no_external_references, debug)
File "/usr/local/lib/python3.7/site-packages/slackviewer/main.py", line 25, in configure_app
top.groups = reader.compile_groups()
File "/usr/local/lib/python3.7/site-packages/slackviewer/reader.py", line 42, in compile_groups
return self._create_messages(group_names, group_data)
File "/usr/local/lib/python3.7/site-packages/slackviewer/reader.py", line 176, in _create_messages
chats = self._build_threads(chats)
File "/usr/local/lib/python3.7/site-packages/slackviewer/reader.py", line 199, in _build_threads
for reply in message._message['replies']:
KeyError: 'replies'
I keep getting same KeyError: 'replies'
I made a PowerShell version:
https://github.com/danielmbond/convert-slack-exports-to-html/tree/master
Here's a challenge - get the PHP script to include bot messages as well. The idea being to have a readable account of alerts dumped into a channel by a bot along with all user posts in response. Would love to see this.
Thank you for the instructions! May somebody please share an example of how the HTML file looks like after the conversion?
The script worked for me on MacOS once I changed the first line to <?php
, thought I might highlight that so it can be patched in.
All I get in the HTML folder after running php slack2html.php is this HTML:
<style> * { font-family:sans-serif; } body { text-align:center; padding:1em; } .messages { width:100%; max-width:700px; text-align:left; display:inline-block; } .messages img { background-color:rgb(248,244,240); width:36px; height:36px; border-radius:0.2em; display:inline-block; vertical-align:top; margin-right:0.65em; } .messages .time { display:inline-block; color:rgb(200,200,200); margin-left:0.5em; } .messages .username { display:inline-block; font-weight:600; line-height:1; } .messages .message { display:inline-block; vertical-align:top; line-height:1; width:calc(100% - 3em); } .messages .message .msg { line-height:1.5; } </style>
Is there a way to add a feature that includes threaded messages? This tool works fine even since the threaded message update, but it sorts threaded messages by time in the whole channel vs. with the original message.