Skip to content

Instantly share code, notes, and snippets.

View daradecic's full-sized avatar

Dario Radečić daradecic

  • NEOS
  • Zagreb
View GitHub Profile
@daradecic
daradecic / pr_curves.py
Created January 4, 2021 07:51
003_pr_curves
ax = df['quality'].value_counts().plot(kind='bar', figsize=(10, 6), fontsize=13, color='#087E8B')
ax.set_title('Counts of Bad and Good vines', size=20, pad=30)
ax.set_ylabel('Count', fontsize=14)
for i in ax.patches:
ax.text(i.get_x() + 0.19, i.get_height() + 100, str(round(i.get_height(), 2)), fontsize=15)
@daradecic
daradecic / pr_curves.py
Created January 4, 2021 07:52
004_pr_curves
from sklearn.model_selection import train_test_split
X = df.drop('quality', axis=1)
y = df['quality']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.25, random_state=42
)
@daradecic
daradecic / pr_curves.py
Created January 4, 2021 07:52
005_pr_curves
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
model_lr = LogisticRegression().fit(X_train, y_train)
probs_lr = model_lr.predict_proba(X_test)[:, 1]
model_dt = DecisionTreeClassifier().fit(X_train, y_train)
probs_dt = model_dt.predict_proba(X_test)[:, 1]
@daradecic
daradecic / pr_curves.py
Created January 4, 2021 07:53
006_pr_curves
from sklearn.metrics import auc, precision_recall_curve
y_test_int = y_test.replace({'Good': 1, 'Bad': 0})
baseline_model = sum(y_test_int == 1) / len(y_test_int)
precision_lr, recall_lr, _ = precision_recall_curve(y_test_int, probs_lr)
auc_lr = auc(recall_lr, precision_lr)
precision_dt, recall_dt, _ = precision_recall_curve(y_test_int, probs_dt)
@daradecic
daradecic / pr_curves.py
Created January 4, 2021 07:53
007_pr_curves
plt.figure(figsize=(12, 7))
plt.plot([0, 1], [baseline_model, baseline_model], linestyle='--', label='Baseline model')
plt.plot(recall_lr, precision_lr, label=f'AUC (Logistic Regression) = {auc_lr:.2f}')
plt.plot(recall_dt, precision_dt, label=f'AUC (Decision Tree) = {auc_dt:.2f}')
plt.plot(recall_rf, precision_rf, label=f'AUC (Random Forests) = {auc_rf:.2f}')
plt.plot(recall_xg, precision_xg, label=f'AUC (XGBoost) = {auc_xg:.2f}')
plt.title('Precision-Recall Curve', size=20)
plt.xlabel('Recall', size=14)
plt.ylabel('Precision', size=14)
plt.legend();
@daradecic
daradecic / synthetic_datasets.py
Created January 11, 2021 09:18
001_synthetic_datasets
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False
@daradecic
daradecic / synthetic_datasets.py
Created January 11, 2021 09:19
002_synthetic_datasets
X, y = make_classification(
n_samples=1000,
n_features=2,
n_redundant=0,
n_clusters_per_class=1,
random_state=42
)
df = pd.concat([pd.DataFrame(X), pd.Series(y)], axis=1)
df.columns = ['x1', 'x2', 'y']
@daradecic
daradecic / synthetic_datasets.py
Created January 11, 2021 09:19
003_synthetic_datasets
def plot(df: pd.DataFrame, x1: str, x2: str, y: str, title: str = '', save: bool = False, figname='figure.png'):
plt.figure(figsize=(14, 7))
plt.scatter(x=df[df[y] == 0][x1], y=df[df[y] == 0][x2], label='y = 0')
plt.scatter(x=df[df[y] == 1][x1], y=df[df[y] == 1][x2], label='y = 1')
plt.title(title, fontsize=20)
plt.legend()
if save:
plt.savefig(figname, dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()
@daradecic
daradecic / synthetic_datasets.py
Created January 11, 2021 09:20
004_synthetic_datasets
X, y = make_classification(
n_samples=1000,
n_features=2,
n_redundant=0,
n_clusters_per_class=1,
flip_y=0.15,
random_state=42
)
df = pd.concat([pd.DataFrame(X), pd.Series(y)], axis=1)
@daradecic
daradecic / synthetic_datasets.py
Created January 11, 2021 09:21
005_synthetic_datasets
X, y = make_classification(
n_samples=1000,
n_features=2,
n_redundant=0,
n_clusters_per_class=1,
weights=[0.95],
random_state=42
)
df = pd.concat([pd.DataFrame(X), pd.Series(y)], axis=1)