Skip to content

Instantly share code, notes, and snippets.

View lulutan98's full-sized avatar

Lulu Tan lulutan98

View GitHub Profile
from imblearn.over_sampling import SMOTE
smote = SMOTE()
X_os, y_os = smote.fit_sample(X, y)
plot_data(X_os,y_os, title="Balanced Dataset - 2 Component PCA")
# plot the balanced dataset
df1 = pd.DataFrame(X_os)
df1['target'] = y_os
def plot_data(X,y,title="Imbalanced Dataset - 2 Component PCA"):
fig = plt.figure(figsize = (6,6))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('PCA 1', fontsize = 12)
ax.set_ylabel('PCA 2', fontsize = 12)
ax.set_title(title, fontsize = 16)
colors = ['g', 'm']
classes = ['0','1']
for i,c in zip(np.unique(y), colors):
ax.scatter(
from sklearn.datasets import make_classification
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
X, y = make_classification(
n_classes=2, weights=[0.9, 0.1], n_samples=100,
)
df = pd.DataFrame(X)
df['target'] = y