Mehdi Mehdi-Amine

## three_models.py
# Creating three models with three different algorithms
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

tree_clf = DecisionTreeClassifier(random_state=42)
log_clf = LogisticRegression(solver="lbfgs", random_state=42)
svm_clf = SVC(gamma="scale", random_state=42)

# Training, predicting, then evaluating the predictions

## ensemble_hard_voting.py
# Combining the three models into an ensemble
from sklearn.ensemble import VotingClassifier

# The ensemble is a voting classifier that aggregates our three models
voting_clf = VotingClassifier(estimators=[('svm', svm_clf), ('tree', tree_clf), ('log', log_clf)],
                             voting='hard')

voting_clf.fit(X_train, y_train) # training
y_pred_voting = voting_clf.predict(X_test) # predicting
accuracy_score(y_test, y_pred_voting) # evaluating

## soft_voting.py
# Out of the three models,
# only SVC requires some tweaking to output its confidence
# this is done by setting probability=True:
svm_clf_tweaked = SVC(gamma='scale', probability=True, random_state=42)

soft_voting_clf = VotingClassifier(estimators=[('svm', svm_clf_tweaked), ('tree', tree_clf), ('log', log_clf)],
                             voting='soft')

soft_voting_clf.fit(X_train, y_train) # training
y_pred_voting = soft_voting_clf.predict(X_test) # predicting

## data_split.py
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## bagging.py
# Bagging creates several models that rely on the same algorithm.
# The training of each model uses a different subset of data sampled randomly from the training set.
# By default Bagging uses soft voting when its base estimator can provide its measure of confidence,
# Hence the SVC model is set to have probability=True
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier

bagging_clf = BaggingClassifier(SVC(gamma='scale', probability=True, random_state=42),
                                bootstrap=True, # set to False to use Pasting instead of Bagging
                                n_estimators=100, # number of SVC models to create

## sgdreg-sklearn.py
from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(max_iter=100, tol=None, penalty=None, eta0=0.01, learning_rate='constant')
sgd_reg.fit(dx_train, dy_train)
print(f"Optimized Parameters: \nBias: {round(sgd_reg.intercept_[0], 2)}, Weights: {sgd_reg.coef_}")
'''
Out:
Optimized Parameters:
Bias: 0.0, Weights: [1.  0.5]
'''

## normalization-sklearn.py
from sklearn.preprocessing import MinMaxScaler
norm_scaler = MinMaxScaler()
dx_train_normalized_sklearn = norm_scaler.fit_transform(dx_train)

# Printing the first 5 rows
print(f"Training data normalized using Scikit-Learn: \n{dx_train_normalized_sklearn[:5]}")

'''
Out:
Training data normalized using Scikit-Learn:

## standardization-sklearn.py
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()
dx_train_standardized_sklearn = std_scaler.fit_transform(dx_train)

# Printing the first 5 rows
print(f"Training data standardized using Scikit-Learn: \n{dx_train_standardized_sklearn[:5]}")

'''
Out:

## plot-sgds.py
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter3d(x = params[:, 0], y = params[:, 1], z = params[:, 2],
                    mode='lines',
                    name='No Scaling',
                    line=dict(color='green', width=2)))
fig.add_trace(go.Scatter3d(x = params_std[:, 0], y = params_std[:, 1], z = params_std[:, 2],
                    mode='lines',
                    name='Standardization',

## random-params.py
import numpy as np
bias = np.random.randn(1, 1)
weights = np.random.randn(input_length, 1) # for some integer input_length
	# Creating three models with three different algorithms
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.svm import SVC

	tree_clf = DecisionTreeClassifier(random_state=42)
	log_clf = LogisticRegression(solver="lbfgs", random_state=42)
	svm_clf = SVC(gamma="scale", random_state=42)

	# Training, predicting, then evaluating the predictions
	# Combining the three models into an ensemble
	from sklearn.ensemble import VotingClassifier

	# The ensemble is a voting classifier that aggregates our three models
	voting_clf = VotingClassifier(estimators=[('svm', svm_clf), ('tree', tree_clf), ('log', log_clf)],
	voting='hard')

	voting_clf.fit(X_train, y_train) # training
	y_pred_voting = voting_clf.predict(X_test) # predicting
	accuracy_score(y_test, y_pred_voting) # evaluating
	# Out of the three models,
	# only SVC requires some tweaking to output its confidence
	# this is done by setting probability=True:
	svm_clf_tweaked = SVC(gamma='scale', probability=True, random_state=42)

	soft_voting_clf = VotingClassifier(estimators=[('svm', svm_clf_tweaked), ('tree', tree_clf), ('log', log_clf)],
	voting='soft')

	soft_voting_clf.fit(X_train, y_train) # training
	y_pred_voting = soft_voting_clf.predict(X_test) # predicting
	from sklearn.model_selection import train_test_split

	X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
	# Bagging creates several models that rely on the same algorithm.
	# The training of each model uses a different subset of data sampled randomly from the training set.
	# By default Bagging uses soft voting when its base estimator can provide its measure of confidence,
	# Hence the SVC model is set to have probability=True
	from sklearn.svm import SVC
	from sklearn.ensemble import BaggingClassifier

	bagging_clf = BaggingClassifier(SVC(gamma='scale', probability=True, random_state=42),
	bootstrap=True, # set to False to use Pasting instead of Bagging
	n_estimators=100, # number of SVC models to create
	from sklearn.linear_model import SGDRegressor
	sgd_reg = SGDRegressor(max_iter=100, tol=None, penalty=None, eta0=0.01, learning_rate='constant')
	sgd_reg.fit(dx_train, dy_train)
	print(f"Optimized Parameters: \nBias: {round(sgd_reg.intercept_[0], 2)}, Weights: {sgd_reg.coef_}")
	'''
	Out:
	Optimized Parameters:
	Bias: 0.0, Weights: [1. 0.5]
	'''
	from sklearn.preprocessing import MinMaxScaler
	norm_scaler = MinMaxScaler()
	dx_train_normalized_sklearn = norm_scaler.fit_transform(dx_train)

	# Printing the first 5 rows
	print(f"Training data normalized using Scikit-Learn: \n{dx_train_normalized_sklearn[:5]}")

	'''
	Out:
	Training data normalized using Scikit-Learn:
	from sklearn.preprocessing import StandardScaler

	std_scaler = StandardScaler()
	dx_train_standardized_sklearn = std_scaler.fit_transform(dx_train)

	# Printing the first 5 rows
	print(f"Training data standardized using Scikit-Learn: \n{dx_train_standardized_sklearn[:5]}")

	'''
	Out:
	import plotly.graph_objects as go

	fig = go.Figure()
	fig.add_trace(go.Scatter3d(x = params[:, 0], y = params[:, 1], z = params[:, 2],
	mode='lines',
	name='No Scaling',
	line=dict(color='green', width=2)))
	fig.add_trace(go.Scatter3d(x = params_std[:, 0], y = params_std[:, 1], z = params_std[:, 2],
	mode='lines',
	name='Standardization',
	import numpy as np
	bias = np.random.randn(1, 1)
	weights = np.random.randn(input_length, 1) # for some integer input_length