Skip to content

Instantly share code, notes, and snippets.

@Netherdrake
Last active March 10, 2019 18:18
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Netherdrake/938f28d1065364711cd36fe776df4d75 to your computer and use it in GitHub Desktop.
Save Netherdrake/938f28d1065364711cd36fe776df4d75 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from scipy.stats.mstats import gmean, hmean
def cv_store(desc, kaggle, tcv, kcv, valid, filename='cvhist.csv'):
"""
Takes the latest CV and LB results and stores them into a csv file.
Args:
desc: Submission description
kaggle: kaggle public LB score
tcv: time-series CV
kcv: K-Fold CV
valid: validation score
Returns:
latest historic dataframe
"""
tcv_col = [f'tcv{i}' for i in range(0, len(tcv))]
kcv_col = [f'kcv{i}' for i in range(0, len(kcv))]
columns = ['ts', 'description', 'kaggle', 'valid', 'tcv_mean', 'tcv_std', 'gmean', *tcv_col, *kcv_col]
new_row = [
pd.datetime.utcnow(),
desc, kaggle, valid,
np.mean(tcv), np.std(tcv),
gmean([np.mean(tcv_), np.mean(kcv_)]),
*tcv, *kcv,
]
try:
df = pd.read_csv(filename)
df = df.append(pd.DataFrame([new_row], columns=columns))
df.reset_index(drop=True, inplace=True)
except:
df = pd.DataFrame(
[new_row],
columns=columns,
)
df['ts'] = pd.to_datetime(df['ts'])
df.sort_values('ts', ascending=False, inplace=True)
df = df.drop_duplicates(subset=['description', 'kaggle'], keep='last')
df.to_csv(filename, index=False)
return df
def cv_hist(filename='cvhist.csv'):
try:
df = pd.read_csv(filename)
df['ts'] = pd.to_datetime(df['ts'])
df.sort_values('ts', ascending=False, inplace=True)
except:
print('History not available.')
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment