Skip to content

Instantly share code, notes, and snippets.

@rohithteja
Last active April 28, 2022 21:49
Show Gist options
  • Save rohithteja/65b851955c44717fcdfe905e38162a0d to your computer and use it in GitHub Desktop.
Save rohithteja/65b851955c44717fcdfe905e38162a0d to your computer and use it in GitHub Desktop.
Sample code with dimensionality reduction and binary classification
from sklearn.datasets import make_classification
from sklearn.decomposition import PCA
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# arguments
dim_red_type = 'pca'
classifier = 'svc'
n_comp = 10
# dataset
X, y = make_classification(n_samples=1000, n_features=30,
n_informative=15, n_redundant=15,
random_state=42)
# data split
X_train,X_test,y_train,y_test = train_test_split(X, y,stratify=y,
test_size=0.3,
random_state=42)
# dimensionality reduction
def dim_reduction(X_train,X_test,dim_red_type,n_comp):
if dim_red_type == 'pca':
dim_red = PCA(n_components=n_comp)
elif dim_red_type == 'lle':
dim_red = LocallyLinearEmbedding(n_components=n_comp)
dim_red.fit(X_train)
X_train_dim = dim_red.transform(X_train)
X_test_dim = dim_red.transform(X_test)
return X_train_dim, X_test_dim
# model training and eval
def train(classifier,X_train,y_train,X_test,y_test):
if classifier == 'lr':
clf = LogisticRegression()
elif classifier == 'svc':
clf = SVC()
elif classifier == 'rf':
clf = RandomForestClassifier()
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
acc_score = accuracy_score(y_test,y_pred).round(3)
return acc_score * 100
X_train, X_test = dim_reduction(X_train,X_test,'lle',2)
train('lr',X_train,y_train,X_test,y_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment