This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def boruta_shap_algorithm(X, y, trials=20, workers=2, significance_level=0.05, seed=2024): | |
# Set the seed | |
np.random.seed(seed) | |
# Assert that the number of samples of both data match | |
assert X.shape[0] == y.shape[0], "X and y dimensions don't coincide" | |
# Set a dictionary to save the number of hits for each feature |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# select features from n number of trials | |
def choose_features(feature_hits, TRIALS, thresh): | |
# Define the boundaries for the green zone | |
# Define the green zone threshold | |
green_zone_thresh = TRIALS - thresh | |
# Define the blue zone upper threshold | |
blue_zone_upper = green_zone_thresh | |
# Define the blue zone lower threshold | |
blue_zone_lower = thresh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set the minimum number of trials as a threshold number to accept an input feature as a selected feature | |
def get_tail_items(pmf, significance_level=0.05): | |
# Set total to zero | |
total = 0 | |
# Create a loop based on the probability mass function | |
for i, x in enumerate(pmf): | |
# Increment the total variable with the probability “x” of i | |
total += x | |
# If total is higher than the significance level | |
if total >= significance_level: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the necessary libraries | |
import scipy as sp | |
import numpy as np | |
import pandas as pd | |
import shap | |
from xgboost import XGBRFClassifier | |
from xgboost import XGBRFRegressor | |
from sklearn.preprocessing import LabelEncoder | |
from concurrent import futures |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data['y'].value_counts() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Obtain the prediction feature with the above function | |
data['y'] = triple_barrier_method(data, holding_period=10, upper_lower_multipliers=[2, 1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the Apple dataframe | |
data = yf.download('AAPL', start='1990-01-01', end='2024-04-04', auto_adjust=True) | |
# Compute the daily volatility | |
data['vol'] = get_Daily_Volatility(data) | |
# Drop the rows that have NaN values | |
data.dropna(inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def triple_barrier_method(df, holding_period=10, upper_lower_multipliers=[2, 1]): | |
# Set the close price as a cupy array | |
close = cp.array(df['Close'].values, dtype=cp.float64) | |
# Set the high price as a cupy array | |
high = cp.array(df['High'].values, dtype=cp.float64) | |
# Set the low price as a cupy array | |
low = cp.array(df['Low'].values, dtype=cp.float64) | |
# Set the daily volatility as a cupy array | |
daily_volatility = cp.array(df['vol'].values, dtype=cp.float64) | |
# Set the barriers empty array as a cupy array |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@cuda.jit | |
def triple_barrier_method_cuda(close, high, low, daily_volatility, upper_lower_multipliers, holding_period, out): | |
# Set the number days passed to zero | |
days_passed = 0 | |
# Set the vertical barrier initial value to NaN | |
vert_barrier = math.nan | |
# Set the top barrier initial value to NaN | |
top_barrier = math.nan | |
# Set the bottom barrier initial value to NaN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_Daily_Volatility(df,span0=20): | |
# simple percentage returns | |
df0=df['Close'].pct_change() | |
# 20 days, a month EWM's std as boundary | |
df0=df0.ewm(span = span0, adjust = False).std() | |
# Round the column values to six decimals | |
df0 = df0.round(6) | |
return df0 |
NewerOlder