Last active
June 29, 2017 09:46
-
-
Save alexcasalboni/8acc123d64118813d4d87428aed2f5ea to your computer and use it in GitHub Desktop.
Let's build a scikit-learn model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sklearn | |
numpy | |
scipy | |
matplotlib | |
pandas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pickle | |
import gzip | |
from sklearn import svm | |
from sklearn.metrics import confusion_matrix | |
import numpy as np | |
import pandas as pd | |
def main(): | |
""" Train and test a model """ | |
dataset = pd.read_csv('zipped_dataset.csv.gz', compression='gzip') | |
N = int(dataset.shape[0] * 0.7) # 70/30 | |
model = load_model('trained_model.pkl', dataset, N) | |
test_model(model, dataset, N) | |
def load_model(filename, dataset, N): | |
""" Retrieve loaded or trained model """ | |
if not os.path.isfile(filename): | |
model = train_model(dataset, N) | |
with gzip.open(filename, 'w') as f: | |
pickle.dump(model, f) | |
else: | |
with gzip.open(filename, 'r') as f: | |
model = pickle.load(f) | |
return model | |
def train_model(dataset, N): | |
""" Train a new model """ | |
X_train = dataset.iloc[:N, 1:] | |
y_train = dataset.iloc[:N, 0] | |
model = svm.LinearSVC() | |
model.fit(X_train, y_train) | |
return model | |
def test_model(model, dataset, N): | |
""" Test the given model (confusion matrix) """ | |
X_test = dataset.iloc[N:, 1:] | |
y_test = dataset.iloc[N:, 0] | |
y_ = model.predict(X_test) | |
C = confusion_matrix(y_test, y_) | |
print np.around(C / C.astype(np.float).sum(axis=1) / 0.01) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment