Skip to content

Instantly share code, notes, and snippets.

@jeethu
Created August 25, 2020 17:01
Numerai Model Evaluation
import functools
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.table as table
import numpy as np
import pandas as pd
from scipy.stats import spearmanr
TOURNAMENT_NAME = "kazutsugi"
TARGET_NAME = f"target_{TOURNAMENT_NAME}"
PREDICTION_NAME = f"prediction_{TOURNAMENT_NAME}"
VAL1_ERAS = tuple(range(121, 133))
VAL2_ERAS = tuple(range(197, 207))
VAL_ERAS = VAL1_ERAS + VAL2_ERAS
def score(df, prediction_col):
return np.corrcoef(
df[TARGET_NAME],
df[prediction_col].rank(pct=True, method="first")
)[0, 1]
def feature_exposures(df, prediction_col):
feature_names = [f for f in df.columns
if f.startswith("feature")]
exposures = []
for f in feature_names:
fe = spearmanr(df[prediction_col], df[f])[0]
exposures.append(fe)
return np.array(exposures)
def max_feature_exposure(df, prediction_col=PREDICTION_NAME):
fe = feature_exposures(df, prediction_col=prediction_col)
return np.max(np.abs(fe))
def feature_exposure(df):
return np.sqrt(np.mean(np.square(feature_exposures(df))))
def evaluate_df(df, prediction_col=PREDICTION_NAME, plot_title="Validation"):
score_fn = functools.partial(score, prediction_col=prediction_col)
era_scores = df.groupby("era").apply(score_fn)
colors = []
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
for i in era_scores.index:
era = int(i[len("era"):])
if era_scores[i] < 0:
colors.append("tab:red")
elif era in VAL1_ERAS:
colors.append("tab:blue")
elif era in VAL2_ERAS:
colors.append("tab:orange")
else:
colors.append("tab:gray")
plt_ax = era_scores.plot(x="era",
kind="bar", legend=False, use_index=True,
color=colors,
title=plot_title,
label="val1",
ax=axes[0])
plt_ax.axhline(y=era_scores.mean(),
linewidth=1, color='tab:blue', label="Mean CORR")
red_patch = mpatches.Patch(color='tab:orange', label='val2')
handles = plt_ax.get_legend_handles_labels()
plt_ax.legend(handles=handles[0] + [red_patch])
fe = feature_exposures(df, prediction_col=prediction_col)
max_fe = np.max(np.abs(fe))
rms_fe = np.sqrt(np.mean(np.square(fe)))
s = pd.Series(np.abs(fe))
plt_ax = s.plot(kind="bar", title="Feature exposures",
use_index=False, ax=axes[1], label="feature exp")
plt_ax.axhline(y=s.max(), linewidth=1, color="tab:red", label="max feature exp")
plt_ax.legend()
plt_ax.axes.get_xaxis().set_visible(False)
celltext = [[f"{rms_fe:.4f}"], [f"{max_fe:.4f}"]]
table.table(plt_ax, cellText=celltext, rowLabels=["fe", "max fe"])
plt.show()
corr = era_scores.mean()
sharpe = corr / era_scores.std()
print(f"Val CORR: {corr:.4f}")
print(f"Val Sharpe: {sharpe:.4f}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment