Skip to content

Instantly share code, notes, and snippets.

@gopal151295
gopal151295 / predict label.py
Last active Apr 1, 2019
AutoTagging - predict label
View predict label.py
norm_title = normalize_text(title)
norm_body = normalize_text(body)
x1_ = pd.Series([norm_body])
x2_ = pd.Series([norm_body])
x1_tfidf = vectorizer_X1.transform(x1_)
x2_tfidf = vectorizer_X2.transform(x2_)
x_tfidf = hstack([x1_tfidf,x2_tfidf])
@gopal151295
gopal151295 / load pre trained model.py
Last active Apr 1, 2019
AutoTagging - load pre trained
View load pre trained model.py
vectorX1_pkl = open('vectorX1.pickle', 'rb')
vectorX2_pkl = open('vectorX2.pickle', 'rb')
mlc_pkl = open('mlc.pickle', 'rb')
vectorizer_X1 = pickle.load(vectorX1_pkl)
vectorizer_X2 = pickle.load(vectorX2_pkl)
best_model = pickle.load(mlc_pkl)
vectorX1_pkl.close()
@gopal151295
gopal151295 / pickle the model.py
Created Apr 1, 2019
AutoTagging - pickle the trained model
View pickle the model.py
import pickle
vectorX1_pkl = open('vectorX1.pickle', 'ab')
vectorX2_pkl = open('vectorX2.pickle', 'ab')
mlc_pkl = open('mlc.pickle', 'ab')
# source, destination
pickle.dump(vectorizer_X1, vectorX1_pkl)
pickle.dump(vectorizer_X2, vectorX2_pkl)
pickle.dump(best_model, mlc_pkl)
@gopal151295
gopal151295 / printTop10 function.py
Created Apr 1, 2019
AutoTagging - printTop10 func
View printTop10 function.py
def print_top10(feature_names, clf, class_labels):
"""Prints features with the highest coefficient values, per class"""
for i, class_label in enumerate(class_labels):
top10 = np.argsort(clf.coef_[i])[-10:]
print("--------------------------------------------")
print("%s: %s" % (class_label,
" ".join(feature_names[j] for j in top10)))
print("--------------------------------------------")
@gopal151295
gopal151295 / top10 features for every class.txt
Created Apr 1, 2019
AutoTagging - top10 features for every class
View top10 features for every class.txt
--------------------------------------------
.net: layout sender assembly microsoft trace scenario manage consolewriteline net net
--------------------------------------------
--------------------------------------------
ajax: render load 200 console people dynamic except implementation ajax ajax
--------------------------------------------
--------------------------------------------
algorithm: binary 1024 tree software fast problem give additional algorithm algorithm
--------------------------------------------
--------------------------------------------
@gopal151295
gopal151295 / confusion matrix output.txt
Created Apr 1, 2019
AutoTagging - confusion matrix output
View confusion matrix output.txt
.net
[[15346 43]
[ 577 113]]
ajax
[[15940 16]
[ 71 52]]
algorithm
[[15827 37]
@gopal151295
gopal151295 / top 10 features.py
Created Mar 29, 2019
AutoTagging - top 10 features
View top 10 features.py
feature_names = vectorizer_X1.get_feature_names() + vectorizer_X2.get_feature_names()
print_top10(feature_names, best_model, multilabel_binarizer.classes_)
@gopal151295
gopal151295 / confusion matrix.py
Created Mar 29, 2019
AutoTagging - confusion matrix
View confusion matrix.py
for i in range(y_train.shape[1]):
print(multilabel_binarizer.classes_[i])
print(confusion_matrix(y_test[:,i], y_pred[:,i]))
print("")
@gopal151295
gopal151295 / best model prediction.py
Created Mar 29, 2019
AutoTagging - best model prediction
View best model prediction.py
best_model = CV_svc.best_estimator_
y_pred = best_model.predict(X_test)
print_score(y_pred, best_model)
@gopal151295
gopal151295 / GridSearch CV on the best classifier.py
Created Mar 29, 2019
AutoTagging - GridSearch CV on the best classifier
View GridSearch CV on the best classifier.py
param_grid = {'estimator__C':[1,10,100,1000]}
svc = OneVsRestClassifier(LinearSVC())
CV_svc = model_selection.GridSearchCV(estimator=svc, param_grid=param_grid, cv= 5, verbose=10, scoring=make_scorer(avg_jacard,greater_is_better=True))
CV_svc.fit(X_train, y_train)
You can’t perform that action at this time.