Skip to content

Instantly share code, notes, and snippets.

@timeit
def generate_clf_from_search(grid_or_random, clf, parameters, scorer, X, y):
    if grid_or_random == "Grid":
        search_obj = GridSearchCV(clf, parameters, scoring=scorer)
    elif grid_or_random == "Random":
parameters = {'max_depth':[1,2,3,4,5],
'min_samples_leaf':[1,2,3,4,5],
'min_samples_split':[2,3,4,5],
'criterion' : ['gini','entropy']}
scorer = make_scorer(f1_score)
# Fit the model
clf.fit(X_train, Y_train)
# Make predictions
train_predictions = clf.predict(X_train)
test_predictions = clf.predict(X_test)
train_cols = df.columns[0:len(df.columns)-1]
target_cols = df.columns[-1]
print('The Training F1 Score is', f1_score(train_predictions, Y_train))
print('The Testing F1 Score is', f1_score(test_predictions, Y_test))
scores = cross_val_score(clf, X_train, Y_train, cv=5, scoring='f1_macro')
scores.mean()
X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
X = df.iloc[:,0:len(df.columns)-1].values
Y = df.iloc[:,-1].values
from sklearn.tree import DecisionTreeClassifier as dt
clf = dt()
clf
file_loc = 'loan_prediction.csv'
df = pd.read_csv(file_loc)
df.head()
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
if 'log_time' in kw:
name = kw.get('log_name', method.__name__.upper())
kw['log_time'][name] = int((te - ts) * 1000)
else:
print('%r %2.2f ms' % \
import numpy as np
import pandas as pd
import time
import warnings
from sklearn import metrics, preprocessing, tree
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import train_test_split