Tushar Tiwari mrtushartiwari

## counter_barplot.py
#ref : https://stackoverflow.com/a/59840744/5305748  for anotating bargraph
from collections import Counter
k = Counter(counts_categorical)
k = dict(k)
k = pd.DataFrame({'number_of_classes':list(k.keys()),'counts': list(k.values()) })
sns.set(rc={'figure.figsize':(11.0,8.0)})
g = sns.barplot(x= 'number_of_classes', y='counts',data=k)

for p in g.patches:
	g.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.3, p.get_height()))

## Boxplot_of_categorical.py
# ref : https://seaborn.pydata.org/tutorial/categorical.html

fig, axs = plt.subplots(1,3,figsize = (22,10) )

sns.boxplot(x="cat80", y="loss",data = train_data, ax= axs[0])
sns.boxplot(x="cat79", y="loss",data = train_data,  ax= axs[1])
sns.boxplot(x="cat87", y="loss",data = train_data,  ax= axs[2])

## featurization_allstate.py
def encode_continous(df,continous_features):
    for col in continous_features:
	df[col +'_log'] = np.log1p(df[col])  # Log transformed
	df[col + '_squareroot'] = np.sqrt(df[col])  # Square root
        df[col + '_square'] = np.square(df[col])  # Square
        df[col + '_log2'] = np.log2(df[col])  # log2

def encode_category(df,categorical_features):
    for col in categorical_features:
        unique_classes = sorted(df[col].unique())

## Base_line.py
median_train_loss = y_train.median()
median_train_loss = [median_train_loss] * len(y_test)
list_shift = [SHIFT] * len(y_test)
print('Mean absolute error on test data for baseline mode ' + str(mean_absolute_error( np.exp( np.array(median_train_loss) - np.array(list_shift)), np.exp(y_test - SHIFT ))))

## listofmodels.csv

          
            Model used
             MAE

            
              Linear model Ridge with alpha (0.01)
              1282.15

            
              Linear model Lasso  with alpha (0.01)
               1342.75

            
              Decision Tree Regressor with (max_depth= 12  max_features= n_features  min_samples_leaf= 27)
              1272.19

            
              Ada Boost Regressor with (n_estimators=100  learning_rate=0.0001)
               1342.750

            
              RandomForest Regressor with n_estimators=150  max_features = 100  max_depth = 11
               1342.750

            
              Custom Ensemble model with number of base estimator = 100 
               1209.61

## custom_ensemble.py
def custom_estimator(X_train,y_train, X_test,n_estimators):
	X_train_param = X_train.copy()
	y_train_param = y_train.copy()
    X_test_param = X_test.copy()
    D1_train,D2_train,D1_label,D2_label, =  train_test_split(X_train_param,y_train_param, test_size=0.5 , random_state=42)
    # Combine D1_train and D1_label then sample
    D1_train = D1_train.assign(loss = D1_label )
    base_models = []

    for i in tqdm(range(n_estimators)):

## FandO.csv

          
            Underlying
            Symbol

            
              Nifty 50
              NIFTY

            
              Nifty Bank
              BANKNIFTY

            
              Nifty Financial Services
              FINNIFTY

            
              AARTI INDUSTRIES LIMITED
              AARTIIND

            
              ACC LIMITED
              ACC

            
              ADANI ENTERPRISES LIMITED
              ADANIENT

            
              ADANI PORTS AND SPECIAL ECONOMIC ZONE LIMITED
              ADANIPORTS

            
              Alkem Laboratories Limited
              ALKEM

            
              AMARA RAJA BATTERIES LIMITED
              AMARAJABAT

## fetch_data.py
def fetch_data(symbol):
    """ Function to save the Open Low high close volume and adjusted_close data. """
    df = pdr.DataReader(symbol + ".NS", 'yahoo', start='2010-01-01', end='2021-06-30')
    df.to_csv("stock_data/" + symbol) # Saving to a csv file

## selection_stocks.py
f_and_o = pd.read_csv("FandO.csv") # File containing the F&O stocks.

stock_selected = []
for i in f_and_o['Symbol'].values:
    stock_selected.append(i)

# Removing the index future and option contracts
stock_selected.remove('NIFTY')
stock_selected.remove('BANKNIFTY')
stock_selected.remove('FINNIFTY')

## fetching_stock_prices.py
for i in stock_selected:
    fetch_data(symbol = i)
    time.sleep(10)
    print(i)
print(len(os.listdir("stock_data"))) # 156 stocks data downloaded
	#ref : https://stackoverflow.com/a/59840744/5305748 for anotating bargraph
	from collections import Counter
	k = Counter(counts_categorical)
	k = dict(k)
	k = pd.DataFrame({'number_of_classes':list(k.keys()),'counts': list(k.values()) })
	sns.set(rc={'figure.figsize':(11.0,8.0)})
	g = sns.barplot(x= 'number_of_classes', y='counts',data=k)

	for p in g.patches:
	g.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.3, p.get_height()))
	# ref : https://seaborn.pydata.org/tutorial/categorical.html

	fig, axs = plt.subplots(1,3,figsize = (22,10) )

	sns.boxplot(x="cat80", y="loss",data = train_data, ax= axs[0])
	sns.boxplot(x="cat79", y="loss",data = train_data, ax= axs[1])
	sns.boxplot(x="cat87", y="loss",data = train_data, ax= axs[2])
	def encode_continous(df,continous_features):
	for col in continous_features:
	df[col +'_log'] = np.log1p(df[col]) # Log transformed
	df[col + '_squareroot'] = np.sqrt(df[col]) # Square root
	df[col + '_square'] = np.square(df[col]) # Square
	df[col + '_log2'] = np.log2(df[col]) # log2

	def encode_category(df,categorical_features):
	for col in categorical_features:
	unique_classes = sorted(df[col].unique())
	median_train_loss = y_train.median()
	median_train_loss = [median_train_loss] * len(y_test)
	list_shift = [SHIFT] * len(y_test)
	print('Mean absolute error on test data for baseline mode ' + str(mean_absolute_error( np.exp( np.array(median_train_loss) - np.array(list_shift)), np.exp(y_test - SHIFT ))))
	Model used	MAE
	Linear model Ridge with alpha (0.01)	1282.15
	Linear model Lasso with alpha (0.01)	1342.75
	Decision Tree Regressor with (max_depth= 12 max_features= n_features min_samples_leaf= 27)	1272.19
	Ada Boost Regressor with (n_estimators=100 learning_rate=0.0001)	1342.750
	RandomForest Regressor with n_estimators=150 max_features = 100 max_depth = 11	1342.750
	Custom Ensemble model with number of base estimator = 100	1209.61
	def custom_estimator(X_train,y_train, X_test,n_estimators):
	X_train_param = X_train.copy()
	y_train_param = y_train.copy()
	X_test_param = X_test.copy()
	D1_train,D2_train,D1_label,D2_label, = train_test_split(X_train_param,y_train_param, test_size=0.5 , random_state=42)
	# Combine D1_train and D1_label then sample
	D1_train = D1_train.assign(loss = D1_label )
	base_models = []

	for i in tqdm(range(n_estimators)):
	Underlying	Symbol
	Nifty 50	NIFTY
	Nifty Bank	BANKNIFTY
	Nifty Financial Services	FINNIFTY
	AARTI INDUSTRIES LIMITED	AARTIIND
	ACC LIMITED	ACC
	ADANI ENTERPRISES LIMITED	ADANIENT
	ADANI PORTS AND SPECIAL ECONOMIC ZONE LIMITED	ADANIPORTS
	Alkem Laboratories Limited	ALKEM
	AMARA RAJA BATTERIES LIMITED	AMARAJABAT
	def fetch_data(symbol):
	""" Function to save the Open Low high close volume and adjusted_close data. """
	df = pdr.DataReader(symbol + ".NS", 'yahoo', start='2010-01-01', end='2021-06-30')
	df.to_csv("stock_data/" + symbol) # Saving to a csv file
	f_and_o = pd.read_csv("FandO.csv") # File containing the F&O stocks.

	stock_selected = []
	for i in f_and_o['Symbol'].values:
	stock_selected.append(i)

	# Removing the index future and option contracts
	stock_selected.remove('NIFTY')
	stock_selected.remove('BANKNIFTY')
	stock_selected.remove('FINNIFTY')
	for i in stock_selected:
	fetch_data(symbol = i)
	time.sleep(10)
	print(i)
	print(len(os.listdir("stock_data"))) # 156 stocks data downloaded