Skip to content

Instantly share code, notes, and snippets.

@drmingle
Created April 30, 2018 23:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drmingle/76516d1a5fd834eddd89059260efcf97 to your computer and use it in GitHub Desktop.
Save drmingle/76516d1a5fd834eddd89059260efcf97 to your computer and use it in GitHub Desktop.
two conventional ways to save models using scikit-learn
title author date
Saving Machine Learning Models
Damian Mingle
04/30/2018

Let's take a look at two conventional ways to save models using scikit-learn

  1. a pickle string
  2. a pickled model as a file.

Preliminaries

from sklearn import datasets
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib

Load Data

# Load example data (iris dataset)
iris = datasets.load_iris()

# Create a matrix of features and a vector target
features, target = iris.data, iris.target

Train Model

# Train an example model (logistic Regression)
clf = LogisticRegression(random_state=0)
clf.fit(features, target)  
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

A Pickle String

# Save the trained model as a pickle string.
saved_model = pickle.dumps(clf)
# Display pickled model
saved_model
b'\x80\x03csklearn.linear_model.logistic\nLogisticRegression\nq\x00)\x81q\x01}q\x02(X\x06\x00\x00\x00solverq\x03X\t\x00\x00\x00liblinearq\x04X\n\x00\x00\x00intercept_q\x05cnumpy.core.multiarray\n_reconstruct\nq\x06cnumpy\nndarray\nq\x07K\x00\x85q\x08C\x01bq\t\x87q\nRq\x0b(K\x01K\x03\x85q\x0ccnumpy\ndtype\nq\rX\x02\x00\x00\x00f8q\x0eK\x00K\x01\x87q\x0fRq\x10(K\x03X\x01\x00\x00\x00<q\x11NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00tq\x12b\x89C\x18\xce\x86D\x03\xb1\xff\xd0?\xcd\xcc=I\xe5]\xf1?\xa9\'\xad\x8dxo\xf3\xbfq\x13tq\x14bX\x0c\x00\x00\x00random_stateq\x15K\x00X\x07\x00\x00\x00penaltyq\x16X\x02\x00\x00\x00l2q\x17X\n\x00\x00\x00warm_startq\x18\x89X\x0c\x00\x00\x00class_weightq\x19NX\x11\x00\x00\x00intercept_scalingq\x1aK\x01X\x01\x00\x00\x00Cq\x1bG?\xf0\x00\x00\x00\x00\x00\x00X\x08\x00\x00\x00max_iterq\x1cKdX\r\x00\x00\x00fit_interceptq\x1d\x88X\x0b\x00\x00\x00multi_classq\x1eX\x03\x00\x00\x00ovrq\x1fX\x07\x00\x00\x00n_iter_q h\x06h\x07K\x00\x85q!h\t\x87q"Rq#(K\x01K\x01\x85q$h\rX\x02\x00\x00\x00i4q%K\x00K\x01\x87q&Rq\'(K\x03h\x11NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00tq(b\x89C\x04\x07\x00\x00\x00q)tq*bX\x07\x00\x00\x00verboseq+K\x00X\x05\x00\x00\x00coef_q,h\x06h\x07K\x00\x85q-h\t\x87q.Rq/(K\x01K\x03K\x04\x86q0h\x10\x88C`\xa6\x1c\x904+\x8f\xda?\x8b7\xf6\x7f9\xaa\xda?.VL\xe5\x05R\xfb\xbf\xf3\xad\xd9^ya\xf7?\x95\x86\x10B\x03\x9d\xf9\xbf\x92\xa7x\xf5\\\x8c\xf8\xbf\x8b$8y\xdd\x18\x02\xc0\x8f\x8f\xee\xd9+|\xe2?X\x10\xf2\xcc\x8c\xc4\x03@\xda\xb0;l,w\xf0\xbf\xbb^\xe7W*+\xf6\xbf\xe2T`-lq\x04@q1tq2bX\x10\x00\x00\x00_sklearn_versionq3X\x06\x00\x00\x000.19.0q4X\x06\x00\x00\x00n_jobsq5K\x01X\x08\x00\x00\x00classes_q6h\x06h\x07K\x00\x85q7h\t\x87q8Rq9(K\x01K\x03\x85q:h\rX\x02\x00\x00\x00i4q;K\x00K\x01\x87q<Rq=(K\x03h\x11NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00tq>b\x89C\x0c\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00q?tq@bX\x04\x00\x00\x00dualqA\x89X\x03\x00\x00\x00tolqBG?\x1a6\xe2\xeb\x1cC-ub.'
# Load the pickled model
clf_from_pickle = pickle.loads(saved_model)

# Load pickled model to make predictions
clf_from_pickle.predict(features)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

A Pickled Model as a File

# Save the model as a pickle in a file
joblib.dump(clf, 'example_file.pkl') 
['example_file.pkl']
# Load the model from a pickled file
clf_from_joblib = joblib.load('example_file.pkl') 
# Load pickled model to make predictions
clf_from_joblib.predict(features)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment