Skip to content

Instantly share code, notes, and snippets.

@zaburo-ch
Created August 21, 2018 11:10
Show Gist options
  • Save zaburo-ch/02d99cd53f5e9d4f2fdebc104df014bf to your computer and use it in GitHub Desktop.
Save zaburo-ch/02d99cd53f5e9d4f2fdebc104df014bf to your computer and use it in GitHub Desktop.
Utility module
import tables as tb
import numpy as np
import pandas as pd
import os
import json
import datetime
import pickle
import inspect
import requests
# path
INPUT_DIR = "data/input/"
OUTPUT_DIR = "data/output/"
WORKING_DIR = "data/working/"
n_train = 4459
slack_url = 'your incoming webhook url'
def incoming_webhook(text, is_code=False):
if is_code:
text = '```\n' + text + '\n```'
if hasattr(os, 'uname'):
username = os.uname()[1]
else:
username = os.getenv('USERNAME') + '@' + os.getenv('COMPUTERNAME')
data = {
'text': text,
'username': username
}
return requests.post(slack_url, data=json.dumps(data))
# io
def save_array(X, filepath):
X = np.asarray(X)
with tb.open_file(filepath, 'w') as f:
atom = tb.Atom.from_dtype(X.dtype)
filters = tb.Filters(complib='blosc', complevel=9)
ds = f.create_carray(f.root, 'X', atom, X.shape, filters=filters)
ds[:] = X
def load_array(filepath):
with tb.open_file(filepath, 'r') as f:
return np.array(f.root.X)
def save_npy(X, filepath):
X = np.asarray(X)
np.save(filepath, X)
def load_npy(filepath):
return np.load(filepath)
def save_df(X, filepath):
pd.DataFrame(X).to_hdf(filepath, complevel=9, complib='blosc', key='table')
def load_df(filepath):
return pd.read_hdf(filepath, key='table')
def save_series(X, filepath):
pd.Series(X).to_hdf(filepath, complevel=9, complib='blosc', key='table')
def load_series(filepath):
return pd.read_hdf(filepath, key='table')
def save_pickle(X, filepath):
with open(filepath, 'wb') as fp:
pickle.dump(X, fp)
def load_pickle(filepath):
with open(filepath, 'rb') as fp:
ret = pickle.load(fp)
return ret
class OutputManager:
def __init__(self, params=None, id_=None, dir_path=None, note_slack=False):
self.id = id_
self.dir_path = dir_path
self.params = params
self.note_slack = note_slack
if self.params is None:
self.params = {}
elif isinstance(self.params, list):
instance_ids = {id(v) for v in self.params}
self.params = {name: v for name, v in \
inspect.currentframe().f_back.f_locals.items() if id(v) in instance_ids}
dt = datetime.datetime.now()
self.params['datetime'] = dt.strftime('%Y%m%d-%H:%M:%S')
def get_newest(self):
exists = [name for name in os.listdir(OUTPUT_DIR) if len(name) == 5]
if len(exists) == 0:
return '00000'
else:
newest = sorted(map(int, exists))[-1]
return f'{newest+1:0>5}'
def get_path(self):
if self.dir_path is None:
if self.id is None:
self.id = self.get_newest()
self.dir_path = OUTPUT_DIR + self.id
if not os.path.exists(self.dir_path):
os.mkdir(self.dir_path)
if not self.dir_path.endswith('/'):
self.dir_path += '/'
json_path = self.dir_path + 'params.json'
if self.params is not None and not os.path.exists(json_path):
with open(json_path, 'w') as fp:
json.dump(self.params, fp, indent=4)
return self.dir_path
def print(self, *values, filename=None):
if filename is None:
filename = 'log.txt'
with open(self.get_path() + filename, 'a') as fp:
print(*values, file=fp)
if self.note_slack:
incoming_webhook(' '.join(map(str, values)), is_code=True)
print(*values)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment