Skip to content

Instantly share code, notes, and snippets.

@sridatta
Created September 8, 2020 03:04
Show Gist options
  • Save sridatta/cc33e3b4bfc6a4840450001554af33a3 to your computer and use it in GitHub Desktop.
Save sridatta/cc33e3b4bfc6a4840450001554af33a3 to your computer and use it in GitHub Desktop.
import argparse
from sklearn.model_selection import validation_curve, GridSearchCV, learning_curve, train_test_split
from sklearn.metrics import make_scorer, accuracy_score, f1_score, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from sklearn import tree
import numpy as np
from collections import namedtuple
import matplotlib.pyplot as plt
from yellowbrick.datasets import load_occupancy
from yellowbrick.model_selection import ValidationCurve
from yellowbrick.classifier import ClassificationReport
import sklearn
#%% Load data
RANDOM_SEED = 123
X, y = sklearn.datasets.make_classification(n_samples=4400, n_features=500, n_informative=5, n_redundant=15)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
#%% Base model
clf = tree.DecisionTreeClassifier()
model = Pipeline([('Scale', StandardScaler()), ("DecisionTree", clf)])
params_grid = {
"DecisionTree__max_depth": np.arange(1, 50, 1),
"DecisionTree__min_samples_split": np.arange(2, 10, 1)
}
scorer = "accuracy"
#%% Grid search
cv = GridSearchCV(model, params_grid, scoring=scorer, cv=5, verbose=5, n_jobs=-1)
cv.fit(x_train, y_train)
print("**** Grid Search ****")
print(f"Best parameters: {cv.best_params_}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment