Skip to content

Instantly share code, notes, and snippets.

@noahwilliamsson
Created April 5, 2019 23:34
Show Gist options
  • Save noahwilliamsson/f9817acb4c16ace8a493be0c24178a9f to your computer and use it in GitHub Desktop.
Save noahwilliamsson/f9817acb4c16ace8a493be0c24178a9f to your computer and use it in GitHub Desktop.
Parse Slack's HTML formatted audit access logs to JSON
#!/usr/bin/env python
#
# Parse Slack's HTML formatted audit access logs (https://theteam.slack.com/admin/logs)
# ..because structured audit logs is apparently an enterprise feature
#
# -- noah@hack.se
#
#
# 1. Open the browser's developer console
# 2. Open https://your-unique-subdomain.slack.com/admin/logs?page=1 and note how many pages are available (e.g. 2175)
# 3. Select "Copy as cURL" on the HTML page
# 4. Run 'for i in $(seq 2175); do sleep 1; curl ...?page=$i ... ; done' to download HTML logs
# 5. Parse the data with the below command
#
# Usage:
# ./slack-parse-html-logs.py [-json] <1.html> [2.html, ..]
#
# Examples:
# ls -1 *.html |sort -n| xargs python slack-parse-html-logs.py > log.txt
# ls -1 *.html |sort -n| xargs python slack-parse-html-logs.py -json > log.json
#
from __future__ import print_function
import sys
import re
import json
from dateutil.parser import parse
from xml.sax.saxutils import unescape
def parse_entry(e, day):
fail = None
obj = {
'name': None,
'user': None,
'ts': parse(day),
'ip': None,
'netid': None,
'ua': None,
'client': None,
}
m = re.findall('class="bold">(.*?)<', e)
if m:
obj['name'] = m[0]
m = re.findall('href="/team/(U\w+)"', e)
if m:
obj['user'] = m[0]
m = re.findall('<br>([0-9:APM ]+)', e)
if m:
# This will be the browser's local time
obj['ts'] = str(parse(' '.join([day, m[0]])))
else:
fail = 'timestamp'
m = re.findall('IP:? ([0-9a-fA-F:.]+)\s*(\((.*?)\))?\.(\s+There.*)?\s*<', e)
# [('30.0.0.30', '(United States; DoD)', 'United States; DoD', ' There was one additional login from this device, at 7:46 PM. ')]
if m:
obj['ip'] = m[0][0]
obj['netid'] = m[0][2].strip() if m[0][2] else None
else:
fail = 'ip address'
m = re.findall('<abbr title="([^"]*)".*?>.*?([\w\s()]+)?</abbr>(<a href="([^"]+)">([^<]+))?', e)
#[('com.tinyspeck.chatlyio/19.3.2 (iPhone; iOS 11.4.1; Scale/2.00)', ' iOS App (iPhone)', '', '', '')]
#[('node-fetch/1.0 (+https://github.com/bitinn/node-fetch)', '', '<a href="/apps/A2RPP3NFR">Jira Cloud', '/apps/A2RPP3NFR', 'Jira Cloud')]
if m:
obj['client'] = m[0][1].strip() if m[0][1] else m[0][4]
obj['ua'] = unescape(m[0][0]) if m[0][0] else obj['client']
else:
fail = 'useragent'
if fail:
print(json.dumps(obj), file=sys.stderr)
print('PARSE ERROR ({})'.format(fail), e, file=sys.stderr)
return obj
def process_file(filename):
with open(filename, 'r') as f:
data = f.read()
sections = re.findall('<section class="tab_pane selected" data-tab="logs">(.*?)</section>', data, re.MULTILINE|re.DOTALL)
divs = re.findall('<div[^>]+>(.*?)</div>', sections[0])
for data in divs:
m = re.findall('<p class="bold no_bottom_margin">(.*?)</p>', data)
if m:
day = m[0]
continue
entries = re.findall('<p style="padding-left: 2.25rem;" class="position_relative">(.*?)</p>', data)
for e in entries:
obj = parse_entry(e, day)
if json_output:
print(json.dumps(obj))
else:
print('{}\t{}\t{}\t{} ({})\t{}'.format(obj['ts'], obj['client'], obj['name'], obj['ip'], obj['netid'], obj['ua']))
if __name__ == '__main__':
json_output = False
files = sys.argv[1:]
if files and '-json' in files[0]:
json_output = True
files = files[1:]
for fn in files:
process_file(fn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment