Created
January 27, 2020 15:17
-
-
Save gracecarrillo/84218a4403b2165fa6d44ba7736598fd to your computer and use it in GitHub Desktop.
K-fold cross-validation using Exhaustive Grid Search. Cross-validation using Scikit Learn's Grid Search.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#----------- CROSS-VALIDATION WITH GRID SEARCH ------------------------# | |
# Naive Bayes Classifier | |
# combine features | |
features_tfidf = features_union(tfidf) | |
# instantiate pipeline object | |
nb_pipeline = Pipeline([('feats', features_tfidf), ('clf', MultinomialNB())]) | |
# parameter grid (3x3x2x2x3x3x2) combinations | |
parameters = { | |
'feats__text__tfidf__max_df': (0.5, 0.75, 1.0), | |
'feats__text__tfidf__ngram_range': ((1, 1), (1, 2), (2, 2)), | |
'feats__text__tfidf__use_idf': (False, True), | |
'feats__text__tfidf__binary':(False, True), | |
'feats__text__tfidf__binary':('l1', 'l2', None), | |
'clf__alpha': (1.0, 5.0, 10.0), | |
'clf__fit_prior': (True, False), | |
} | |
# instantiate GridSearchCV object with pipeline and parameters with 3-folds cross-validation | |
nb_grid = GridSearchCV(nb_pipeline, parameters, cv=3) # this takes a while :/ | |
# start time | |
nb_start = time.time() | |
# Fit | |
nb_grid.fit(X_train, y_train) | |
# end time | |
svm_end = time.time() | |
print(f"Time taken to run: {round((nb_end - nb_start)/60,1)} minutes") | |
# Check score | |
nb_grid.score(X_test, y_test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment