Skip to content

Instantly share code, notes, and snippets.

def boruta_shap_algorithm(X, y, trials=20, workers=2, significance_level=0.05, seed=2024):
# Set the seed
# Assert that the number of samples of both data match
assert X.shape[0] == y.shape[0], "X and y dimensions don't coincide"
# Set a dictionary to save the number of hits for each feature
# select features from n number of trials
def choose_features(feature_hits, TRIALS, thresh):
# Define the boundaries for the green zone
# Define the green zone threshold
green_zone_thresh = TRIALS - thresh
# Define the blue zone upper threshold
blue_zone_upper = green_zone_thresh
# Define the blue zone lower threshold
blue_zone_lower = thresh
# Set the minimum number of trials as a threshold number to accept an input feature as a selected feature
def get_tail_items(pmf, significance_level=0.05):
# Set total to zero
total = 0
# Create a loop based on the probability mass function
for i, x in enumerate(pmf):
# Increment the total variable with the probability “x” of i
total += x
# If total is higher than the significance level
if total >= significance_level:
# Import the necessary libraries
import scipy as sp
import numpy as np
import pandas as pd
import shap
from xgboost import XGBRFClassifier
from xgboost import XGBRFRegressor
from sklearn.preprocessing import LabelEncoder
from concurrent import futures
# Obtain the prediction feature with the above function
data['y'] = triple_barrier_method(data, holding_period=10, upper_lower_multipliers=[2, 1])
# Import the Apple dataframe
data ='AAPL', start='1990-01-01', end='2024-04-04', auto_adjust=True)
# Compute the daily volatility
data['vol'] = get_Daily_Volatility(data)
# Drop the rows that have NaN values
def triple_barrier_method(df, holding_period=10, upper_lower_multipliers=[2, 1]):
# Set the close price as a cupy array
close = cp.array(df['Close'].values, dtype=cp.float64)
# Set the high price as a cupy array
high = cp.array(df['High'].values, dtype=cp.float64)
# Set the low price as a cupy array
low = cp.array(df['Low'].values, dtype=cp.float64)
# Set the daily volatility as a cupy array
daily_volatility = cp.array(df['vol'].values, dtype=cp.float64)
# Set the barriers empty array as a cupy array
def triple_barrier_method_cuda(close, high, low, daily_volatility, upper_lower_multipliers, holding_period, out):
# Set the number days passed to zero
days_passed = 0
# Set the vertical barrier initial value to NaN
vert_barrier = math.nan
# Set the top barrier initial value to NaN
top_barrier = math.nan
# Set the bottom barrier initial value to NaN
def get_Daily_Volatility(df,span0=20):
# simple percentage returns
# 20 days, a month EWM's std as boundary
df0=df0.ewm(span = span0, adjust = False).std()
# Round the column values to six decimals
df0 = df0.round(6)
return df0