Skip to content

Instantly share code, notes, and snippets.

f = lambda x: models[random_model].predict(x)
med = model_data.loc[:,[i for i in model_data.columns if i!='label']].median().values.reshape((1,model_data.shape[1]-1))
explainer = shap.Explainer(f, med)
shap_values = explainer(model_data.loc["2018",[i for i in model_data.columns if i!='label']])
shap.plots.waterfall(shap_values[129])
def Find_Optimal_Cutoff(y_real, y_hat):
""" Find the optimal probability cutoff point for a classification model related to event rate
Parameters
----------
target : Matrix with dependent or target data, where rows are observations
predicted : Matrix with predicted data, where rows are observations
def build_elasticnet():
return ElasticNet()
def generate_signal(model_constructor, data, label:str, start_date:str, end_date:str, retrain_window:str="1m", keep_models:bool=False):
# generate date series ---
first = pd.to_datetime(start_date).replace(day=1)
seriesFirstDate = first - datetime.timedelta(days=1)
dates = pd.date_range(seriesFirstDate, end_date, freq = retrain_window)
signal = []
import pandas as pd
from typing import List, Dict, Any
import numpy as np
def compound(r):
"""
returns the result of compounding the set of returns in r
"""
return np.expm1(np.log1p(r).sum())
import numpy as np
import pandas as pd
from typing import Dict, List, Any
def rsi_point(values):
up = values[values>0].mean()
down = -1*values[values<0].mean()
return 100 * up / (up + down)
def rsi(df_ohlc, window:int = 14, norm:bool=False):
import pandas as pd
import numpy as np
def below_dd(series, max_dd):
return any(1-series.cumprod()>max_dd)
def over_rt(series, min_rt):
return any(series.cumprod() >= 1+min_rt)
def compute_triple_barrier(_, max_dd, min_rt, hz, label_type : str = None, debug_mode = False):
import pandas as pd
from pandas_datareader import data as pdr
import numpy as np
import yfinance as yf
yf.pdr_override()
instrument = '^GSPC'
data = pdr.get_data_yahoo(instrument)
data.tail()
def create_knapsack(df, maxPrice: float = 50.0, maxWines: int = 12):
model = pyo.ConcreteModel()
Wines = df.wine.values.tolist()
Types = df.type.values.tolist()
Regions = df.region.values.tolist()
Prices = df.price.values.tolist()
Ratings = df.rating.values.tolist()
import cloudpickle
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm import tqdm
import re
# Load model and data -----
holdout_embeddings = cloudpickle.load(open('./embedding_dictionary_holdout.pkl','rb'))
model = tf.keras.models.load_model('./cover_ranker')
import pandas as pd
import cloudpickle
import pickle
import re
import jsonlines
from tqdm import tqdm
import tensorflow as tf
# Define model architecture ---------------------------
activation = tf.keras.layers.LeakyReLU()