Last active
January 1, 2020 09:13
-
-
Save mirontoli/37bbb4a1cd3a0d86cea1ff0c798ac985 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# make predictions | |
from pandas import read_csv | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import classification_report | |
from sklearn.metrics import confusion_matrix | |
from sklearn.metrics import accuracy_score | |
from sklearn.svm import SVC | |
# Load dataset | |
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv" | |
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'] | |
dataset = read_csv(url, names=names) | |
# Split-out validation dataset | |
array = dataset.values | |
X = array[:,0:4] | |
y = array[:,4] | |
X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1) | |
# Make predictions on validation dataset | |
model = SVC(gamma='auto') | |
model.fit(X_train, Y_train) | |
predictions = model.predict(X_validation) | |
# Evaluate predictions | |
print(accuracy_score(Y_validation, predictions)) | |
print(confusion_matrix(Y_validation, predictions)) | |
print(classification_report(Y_validation, predictions)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Hello World of Machine Learning | |
# https://machinelearningmastery.com/machine-learning-in-python-step-by-step/ | |
# python3 and pip3 were used | |
# install it in terminal | |
# pip install --user scipy numpy matplotlib pandas sklearn | |
# Use vscode to run it line by line | |
# Check the versions of libraries | |
# Python version | |
import sys | |
print('Python: {}'.format(sys.version)) | |
# scipy | |
import scipy | |
print('scipy: {}'.format(scipy.__version__)) | |
# numpy | |
import numpy | |
print('numpy: {}'.format(numpy.__version__)) | |
# matplotlib | |
import matplotlib | |
print('matplotlib: {}'.format(matplotlib.__version__)) | |
# pandas | |
import pandas | |
print('pandas: {}'.format(pandas.__version__)) | |
# scikit-learn | |
import sklearn | |
print('sklearn: {}'.format(sklearn.__version__)) | |
# Load libraries | |
from pandas import read_csv | |
from pandas.plotting import scatter_matrix | |
from matplotlib import pyplot | |
from sklearn.model_selection import train_test_split | |
from sklearn.model_selection import cross_val_score | |
from sklearn.model_selection import StratifiedKFold | |
from sklearn.metrics import classification_report | |
from sklearn.metrics import confusion_matrix | |
from sklearn.metrics import accuracy_score | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.svm import SVC | |
# Load dataset | |
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv" | |
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'] | |
dataset = read_csv(url, names=names) | |
# Statistics | |
print(dataset.shape) | |
print(dataset.head(20)) | |
print(dataset.describe()) | |
print(dataset.groupby('class').size()) | |
# Visualization | |
dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False) | |
pyplot.show() | |
# histograms | |
dataset.hist() | |
pyplot.show() | |
# scatter plot matrix | |
scatter_matrix(dataset) | |
pyplot.show() | |
# Split-out validation dataset | |
array = dataset.values | |
X = array[:,0:4] | |
y = array[:,4] | |
X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1) | |
#... | |
#model = ... | |
# Test options and evaluation metric | |
#kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True) | |
#cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy') | |
# Spot Check Algorithms | |
models = [] | |
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr'))) | |
models.append(('LDA', LinearDiscriminantAnalysis())) | |
models.append(('KNN', KNeighborsClassifier())) | |
models.append(('CART', DecisionTreeClassifier())) | |
models.append(('NB', GaussianNB())) | |
models.append(('SVM', SVC(gamma='auto'))) | |
# evaluate each model in turn | |
results = [] | |
names = [] | |
for name, model in models: | |
kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True) | |
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy') | |
results.append(cv_results) | |
names.append(name) | |
print(f'{name}: {cv_results.mean()} ({cv_results.std()})') | |
# Compare Algorithms | |
pyplot.boxplot(results, labels=names) | |
pyplot.title('Algorithm Comparison') | |
pyplot.show() | |
# Make predictions on validation dataset | |
model = SVC(gamma='auto') | |
model.fit(X_train, Y_train) | |
predictions = model.predict(X_validation) | |
#Evaluate predictions | |
print(accuracy_score(Y_validation, predictions)) | |
print(confusion_matrix(Y_validation, predictions)) | |
print(classification_report(Y_validation, predictions)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment