beannguyen/setup_svm_model.py

## setup_svm_model.py
from sklearn.svm import SVC
from time import time
from sklearn.metrics import f1_score

def train_classifier(clf, X_train, y_train):
    ''' Fits a classifier to the training data. '''

    # Start the clock, train the classifier, then stop the clock
    start = time()
    clf.fit(X_train, y_train)
    end = time()

    # Print the results
    print("Trained model in {:.4f} seconds".format(end - start))


def predict_labels(clf, features, target):
    ''' Makes predictions using a fit classifier based on F1 score. '''

    # Start the clock, make predictions, then stop the clock
    start = time()
    y_pred = clf.predict(features)

    end = time()
    # Print and return results
    print("Made predictions in {:.4f} seconds.".format(end - start))

    return f1_score(target, y_pred, labels=[0, 1], average='micro'), sum(target == y_pred) / float(len(y_pred))


def train_predict(clf, X_train, y_train, X_test, y_test):
    ''' Train and predict using a classifer based on F1 score. '''

    # Indicate the classifier and the training set size
    print("Training a {} using a training set size of {}. . .".format(
        clf.__class__.__name__, len(X_train)))

    # Train the classifier
    train_classifier(clf, X_train, y_train)

    # Print the results of prediction for both training and testing
    f1, acc = predict_labels(clf, X_train, y_train)
    print (f1, acc)
    print("F1 score and accuracy score for training set: {:.4f} , {:.4f}.".format(
        f1, acc))

    f1, acc = predict_labels(clf, X_test, y_test)
    print("F1 score and accuracy score for test set: {:.4f} , {:.4f}.".format(
        f1, acc))

    from sklearn.externals import joblib

    filename = 'finalized_model.sav'
    joblib.dump(clf, filename)


clf_B = SVC(random_state=912, kernel='rbf')
train_predict(clf_B, X_train, y_train, X_test, y_test)
	from sklearn.svm import SVC
	from time import time
	from sklearn.metrics import f1_score

	def train_classifier(clf, X_train, y_train):
	''' Fits a classifier to the training data. '''

	# Start the clock, train the classifier, then stop the clock
	start = time()
	clf.fit(X_train, y_train)
	end = time()

	# Print the results
	print("Trained model in {:.4f} seconds".format(end - start))


	def predict_labels(clf, features, target):
	''' Makes predictions using a fit classifier based on F1 score. '''

	# Start the clock, make predictions, then stop the clock
	start = time()
	y_pred = clf.predict(features)

	end = time()
	# Print and return results
	print("Made predictions in {:.4f} seconds.".format(end - start))

	return f1_score(target, y_pred, labels=[0, 1], average='micro'), sum(target == y_pred) / float(len(y_pred))


	def train_predict(clf, X_train, y_train, X_test, y_test):
	''' Train and predict using a classifer based on F1 score. '''

	# Indicate the classifier and the training set size
	print("Training a {} using a training set size of {}. . .".format(
	clf.__class__.__name__, len(X_train)))

	# Train the classifier
	train_classifier(clf, X_train, y_train)

	# Print the results of prediction for both training and testing
	f1, acc = predict_labels(clf, X_train, y_train)
	print (f1, acc)
	print("F1 score and accuracy score for training set: {:.4f} , {:.4f}.".format(
	f1, acc))

	f1, acc = predict_labels(clf, X_test, y_test)
	print("F1 score and accuracy score for test set: {:.4f} , {:.4f}.".format(
	f1, acc))

	from sklearn.externals import joblib

	filename = 'finalized_model.sav'
	joblib.dump(clf, filename)


	clf_B = SVC(random_state=912, kernel='rbf')
	train_predict(clf_B, X_train, y_train, X_test, y_test)