Skip to content

Instantly share code, notes, and snippets.

@y16ra
Created December 1, 2015 10:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save y16ra/1526ec9a479d589ef71f to your computer and use it in GitHub Desktop.
Save y16ra/1526ec9a479d589ef71f to your computer and use it in GitHub Desktop.
slackからexportしたデータをMongoDBに取り込む
#!/usr/bin/env python
# -*- coding: utf_8 -*-
import json
import sys, os, datetime
import pymongo
# sysモジュールをリロードする
reload(sys)
# デフォルトの文字コードを変更する.
sys.setdefaultencoding('utf_8')
# print sys.stdout.encoding
# print sys.getdefaultencoding()
BASE_DIR = '/Users/WORKDIR_PATH'
# User info reads from json file
with open(BASE_DIR + 'users.json', 'r') as f:
userData = json.load(f)
users_dict = {}
for user in userData:
users_dict.update({user['id']:user['name']})
# mongodb へのアクセスを確立
client = pymongo.MongoClient('localhost', 27017)
# データベースを作成
db = client.slack_data
for root, dirs, files in os.walk(BASE_DIR):
for file_ in files:
full_path = os.path.join(root, file_)
channel_name = full_path.replace(BASE_DIR, '').replace('/' + os.path.basename(full_path), '')
# MongoDBのコレクションをチャンネル名で作成
col = db[channel_name]
filename, ext = os.path.splitext(os.path.basename(full_path))
print 'now processing... :' + filename
exclude_files = [
# 'users',
'channels',
'integration_logs'
]
if ext != '.json' or filename in exclude_files:
continue
# read slack data files
with open(full_path, 'r') as f:
jsonData = json.load(f)
# show post data
exclude_post_type = [
'channel_join',
'channel_purpose'
]
for data in jsonData:
if 'subtype' in data and data['subtype'] in exclude_post_type:
continue
if filename == 'users':
post_id = col.update(data, data, upsert=True)
continue
# print json.dumps(data, indent=4)
post_data = {
'json_filename': filename,
'user_id': data['user'] if 'user' in data else data['username'] if 'username' in data else 'none',
'user': users_dict[data['user']] if 'user' in data and data['user'] in users_dict else data['username'] if 'username' in data else 'none',
'text': data['text'] if 'text' in data else 'none',
'ts': datetime.datetime.fromtimestamp(float(data['ts'])).strftime("%Y-%m-%d %H:%M:%S"),
'raw': data
}
col.update(post_data, post_data, upsert=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment