This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1- Generating a dataset. | |
from sklearn.datasets import make_moons | |
# X are the generated instances, an array of shape (500,2). | |
# y are the labels of X, with values of either 0 or 1. | |
X, y = make_moons(n_samples=500, noise=0.3, random_state=42) | |
# 2- Visualizing the dataset. | |
from matplotlib import pyplot as plt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creating three models with three different algorithms | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.svm import SVC | |
tree_clf = DecisionTreeClassifier(random_state=42) | |
log_clf = LogisticRegression(solver="lbfgs", random_state=42) | |
svm_clf = SVC(gamma="scale", random_state=42) | |
# Training, predicting, then evaluating the predictions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Combining the three models into an ensemble | |
from sklearn.ensemble import VotingClassifier | |
# The ensemble is a voting classifier that aggregates our three models | |
voting_clf = VotingClassifier(estimators=[('svm', svm_clf), ('tree', tree_clf), ('log', log_clf)], | |
voting='hard') | |
voting_clf.fit(X_train, y_train) # training | |
y_pred_voting = voting_clf.predict(X_test) # predicting | |
accuracy_score(y_test, y_pred_voting) # evaluating |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Out of the three models, | |
# only SVC requires some tweaking to output its confidence | |
# this is done by setting probability=True: | |
svm_clf_tweaked = SVC(gamma='scale', probability=True, random_state=42) | |
soft_voting_clf = VotingClassifier(estimators=[('svm', svm_clf_tweaked), ('tree', tree_clf), ('log', log_clf)], | |
voting='soft') | |
soft_voting_clf.fit(X_train, y_train) # training | |
y_pred_voting = soft_voting_clf.predict(X_test) # predicting |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Bagging creates several models that rely on the same algorithm. | |
# The training of each model uses a different subset of data sampled randomly from the training set. | |
# By default Bagging uses soft voting when its base estimator can provide its measure of confidence, | |
# Hence the SVC model is set to have probability=True | |
from sklearn.svm import SVC | |
from sklearn.ensemble import BaggingClassifier | |
bagging_clf = BaggingClassifier(SVC(gamma='scale', probability=True, random_state=42), | |
bootstrap=True, # set to False to use Pasting instead of Bagging | |
n_estimators=100, # number of SVC models to create |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
bias = np.random.randn(1, 1) | |
weights = np.random.randn(input_length, 1) # for some integer input_length |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sigmoid(z): | |
return 1.0 / (1.0 + np.exp(-z)) | |
z = numpy.dot(weights, instance) + bias.squeeze() | |
a = sigmoid(z) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cost_derivative(activation, y): | |
return (activation - y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
np.random.seed(42) | |
dx = np.random.randint(low=-10, high=11, size=(100,2)).astype(float) | |
dy = (dx[:,0] + dx[:,1]*0.5) | |
dx_train, dx_test, dy_train, dy_test = dx[:80], dx[80:], dy[:80], dy[80:] | |
training_data = list(zip(dx_train, dy_train)) | |
testing_data = list(zip(dx_test, dy_test)) | |
print(f"The first 5 rows of the dataset:\n{np.concatenate((dx, dy.reshape(-1,1)), axis=1)[:5]}") |
OlderNewer