Skip to content

Instantly share code, notes, and snippets.

@spencercarter
Last active November 16, 2018 13:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save spencercarter/b4a16e9924a6dab46c9dd604860444d3 to your computer and use it in GitHub Desktop.
Save spencercarter/b4a16e9924a6dab46c9dd604860444d3 to your computer and use it in GitHub Desktop.
Gists containing code to run the Models as a Web Service blog post
# Part 1: Model
import sklearn as sk
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LassoCV
import pandas as pd
from flask import Flask
from flask import request
import requests
import pickle
from time import time
# Load up the Mass data... I like keeping things in pandas DFs
mass = load_boston()
# Lowecase the predictor names and send the data to Pandas
predictors = [var.lower() for var in mass.feature_names]
X = pd.DataFrame(mass['data'], columns=predictors)
y = pd.Series(mass['target']) # medv
# Run a 3-fold CV LASSO with 10 weights chosen by steepest descent
lasso = LassoCV(n_alphas=10, normalize=True, cv=3)
print("Fitting model...")
lasso.fit(X,y)
print("Model fit. R-Squared = {0:0.2f}%".format(100*lasso.score(X,y)))
# A couple simple pickle functions
def pickle_me(obj, outfile):
with open(outfile, 'wb') as f:
pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
return None
def unpickle_me(infile):
with open(infile, 'rb') as f:
unpickled = pickle.load(f)
return unpickled
to_pickle = {'X':X, 'lasso':lasso, 'dtypes':X.dtypes, 'predictors':predictors}
pickle_me(to_pickle, 'model_web_service.pickle')
# Part 2: Listener
# Repeat the setup block for the listener and sender
import sklearn as sk
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LassoCV
import pandas as pd
from flask import Flask
from flask import request
import requests
import pickle
from time import time
def unpickle_me(infile):
with open(infile, 'rb') as f:
unpickled = pickle.load(f)
return unpickled
# Read in our pickled dictionary
stored = unpickle_me('model_web_service.pickle')
# Read our data and model back into memory
df_types = stored['dtypes']
predictors = stored['predictors']
X = stored['X']
lasso = stored['lasso']
# Next, create a scoring function that takes a JSON, and returns a model prediction.
def score_obs(indata, model=lasso, predictors=predictors, df_types=df_types):
"""
Scoring function. Takes a JSON of variables and values, then runs them through the model and returns a prediction
"""
# Structure the input into a DF, but then impose the expected variable structure, using the .loc slicer
data = pd.read_json(indata,typ='series').to_frame().T.loc[:,predictors]
# .loc will drop excess, and create variables that aren't present. Fill their values so the model runs
data.fillna(0, inplace=True) # !!! In practice, you should actually build in error handling or imputation
# Fix the dtypes
for c in data:
data[c] = data[c].astype(df_types[c])
# predict returns an array, so grab the first value
return model.predict(data.values.reshape(1,-1))[0]
# To make sure everything is working, compare a direct prediction against one from the function
print("Direct prediction: {} \nUsing this function: {}".format(lasso.predict(X.iloc[0:1])[0],score_obs(X.iloc[0].to_json())))
# Now the model is in memory, and we have a scoring function! Let's set up the web service.
app = Flask(__name__)
app.silent = True # Suppresses logging and errors. In practice, you'll likely want them, or import logging
@app.route("/mass", methods=['POST'])
def predict_medv():
# Get the JSON from the request and send to our scoring function
score = score_obs(request.get_json())
return "{0:0.2f}".format(score)
app.run(port=1234) # defaults to localhost. Use host= option to change
# At this point, the web service is up!
# Part 3: Sender
# Repeat the setup block for the listener and sender
import sklearn as sk
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LassoCV
import pandas as pd
from flask import Flask
from flask import request
import requests
import pickle
from time import time
def unpickle_me(infile):
with open(infile, 'rb') as f:
unpickled = pickle.load(f)
return unpickled
# Read in our pickled dictionary
stored = unpickle_me('model_web_service.pickle')
# Save the data types we expect, to ensure typing works
df_types = stored['dtypes']
predictors = stored['predictors']
X = stored['X']
lasso = stored['lasso']
# We need to generate some test cases from our data. As you saw when creating the scoring function, we're going to turn our data into a JSON.
def make_a_post(indx):
url = "http://127.0.0.1:1234/mass"
data = X.iloc[indx].to_json()
req = requests.post(url, json=data)
if req.status_code != 200:
return .0
return float(req.content.decode("utf-8")) # The prediction will come in as a byte. Decode, then make float
# Finally, let's toss obervation 0 to the webservice. We know from earlier, that it should return 30.505530469875787, rounded to 2 decimal-places.
print(make_a_post(0))
# And a few more for show. Let's sample over the observations:
for i in np.random.choice(range(X.shape[0]), 10):
print("Obs {0:03d}: {1}".format(i, make_a_post(i)))
# And let's see how long these take to run. Take the average of 100 runs:
lag = []
rep = 100
for _ in range(100):
t0 = time()
make_a_post(0)
lag.append(time()-t0)
print("Over {0} runs, the web service averaged {1:0.4f} seconds to score".format(rep,np.mean(lag)))
# **Success!**
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment