Skip to content

Instantly share code, notes, and snippets.

@jeethu

jeethu/evaluation.py

Created Aug 25, 2020
Embed
What would you like to do?
Numerai Model Evaluation
import functools
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.table as table
import numpy as np
import pandas as pd
from scipy.stats import spearmanr
TOURNAMENT_NAME = "kazutsugi"
TARGET_NAME = f"target_{TOURNAMENT_NAME}"
PREDICTION_NAME = f"prediction_{TOURNAMENT_NAME}"
VAL1_ERAS = tuple(range(121, 133))
VAL2_ERAS = tuple(range(197, 207))
VAL_ERAS = VAL1_ERAS + VAL2_ERAS
def score(df, prediction_col):
return np.corrcoef(
df[TARGET_NAME],
df[prediction_col].rank(pct=True, method="first")
)[0, 1]
def feature_exposures(df, prediction_col):
feature_names = [f for f in df.columns
if f.startswith("feature")]
exposures = []
for f in feature_names:
fe = spearmanr(df[prediction_col], df[f])[0]
exposures.append(fe)
return np.array(exposures)
def max_feature_exposure(df, prediction_col=PREDICTION_NAME):
fe = feature_exposures(df, prediction_col=prediction_col)
return np.max(np.abs(fe))
def feature_exposure(df):
return np.sqrt(np.mean(np.square(feature_exposures(df))))
def evaluate_df(df, prediction_col=PREDICTION_NAME, plot_title="Validation"):
score_fn = functools.partial(score, prediction_col=prediction_col)
era_scores = df.groupby("era").apply(score_fn)
colors = []
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
for i in era_scores.index:
era = int(i[len("era"):])
if era_scores[i] < 0:
colors.append("tab:red")
elif era in VAL1_ERAS:
colors.append("tab:blue")
elif era in VAL2_ERAS:
colors.append("tab:orange")
else:
colors.append("tab:gray")
plt_ax = era_scores.plot(x="era",
kind="bar", legend=False, use_index=True,
color=colors,
title=plot_title,
label="val1",
ax=axes[0])
plt_ax.axhline(y=era_scores.mean(),
linewidth=1, color='tab:blue', label="Mean CORR")
red_patch = mpatches.Patch(color='tab:orange', label='val2')
handles = plt_ax.get_legend_handles_labels()
plt_ax.legend(handles=handles[0] + [red_patch])
fe = feature_exposures(df, prediction_col=prediction_col)
max_fe = np.max(np.abs(fe))
rms_fe = np.sqrt(np.mean(np.square(fe)))
s = pd.Series(np.abs(fe))
plt_ax = s.plot(kind="bar", title="Feature exposures",
use_index=False, ax=axes[1], label="feature exp")
plt_ax.axhline(y=s.max(), linewidth=1, color="tab:red", label="max feature exp")
plt_ax.legend()
plt_ax.axes.get_xaxis().set_visible(False)
celltext = [[f"{rms_fe:.4f}"], [f"{max_fe:.4f}"]]
table.table(plt_ax, cellText=celltext, rowLabels=["fe", "max fe"])
plt.show()
corr = era_scores.mean()
sharpe = corr / era_scores.std()
print(f"Val CORR: {corr:.4f}")
print(f"Val Sharpe: {sharpe:.4f}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.