Author
Gabriel Ziegler
# META CODE | |
from sklearn.multiclass import OneVsRestClassifier | |
from xgboost import XGBClassifier | |
from sklearn.preprocessing import MultiLabelBinarizer | |
clf = OneVsRestClassifier(XGBClassifier(n_jobs=-1, max_depth=4)) | |
# You may need to use MultiLabelBinarizer to encode your variables from arrays [[x, y, z]] to a multilabel | |
# format before training. | |
mlb = MultiLabelBinarizer() |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
def plot_confusion_matrix(cm, classes, normalized=True, cmap='bone'): | |
plt.figure(figsize=[5, 5]) | |
norm_cm = cm | |
if normalized: | |
norm_cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] | |
sns.heatmap(norm_cm, annot=cm, fmt='g', xticklabels=classes, yticklabels=classes, cmap=cmap) |
from sklearn.metrics import classification_report | |
bst = xgb.train(params, dtrain) | |
pred = bst.predict(dtest) | |
print(classification_report(y_test, pred)) |
params = { | |
'max_depth': 6, | |
'objective': 'multi:softmax', | |
'num_class': 3 | |
} |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y) | |
dtrain = xgb.DMatrix(data=X_train, label=y_train) | |
dtest = xgb.DMatrix(data=X_test) |
features = [ | |
'alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash','magnesium', | |
'total_phenols', 'flavanoids', 'nonflavanoid_phenols', | |
'proanthocyanins', 'color_intensity', 'hue', | |
'od280/od315_of_diluted_wines', 'proline' | |
] | |
X = pd.DataFrame(data=X, columns=features) | |
y = pd.DataFrame(data=y, columns=['classes']) |
import pandas as pd | |
from sklearn.datasets import load_wine | |
from sklearn.model_selection import train_test_split | |
X, y = load_wine(return_X_y=True) |
Gabriel Ziegler