Last active
November 16, 2018 13:59
-
-
Save spencercarter/b4a16e9924a6dab46c9dd604860444d3 to your computer and use it in GitHub Desktop.
Gists containing code to run the Models as a Web Service blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Part 1: Model | |
import sklearn as sk | |
import numpy as np | |
from sklearn.datasets import load_boston | |
from sklearn.linear_model import LassoCV | |
import pandas as pd | |
from flask import Flask | |
from flask import request | |
import requests | |
import pickle | |
from time import time | |
# Load up the Mass data... I like keeping things in pandas DFs | |
mass = load_boston() | |
# Lowecase the predictor names and send the data to Pandas | |
predictors = [var.lower() for var in mass.feature_names] | |
X = pd.DataFrame(mass['data'], columns=predictors) | |
y = pd.Series(mass['target']) # medv | |
# Run a 3-fold CV LASSO with 10 weights chosen by steepest descent | |
lasso = LassoCV(n_alphas=10, normalize=True, cv=3) | |
print("Fitting model...") | |
lasso.fit(X,y) | |
print("Model fit. R-Squared = {0:0.2f}%".format(100*lasso.score(X,y))) | |
# A couple simple pickle functions | |
def pickle_me(obj, outfile): | |
with open(outfile, 'wb') as f: | |
pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL) | |
return None | |
def unpickle_me(infile): | |
with open(infile, 'rb') as f: | |
unpickled = pickle.load(f) | |
return unpickled | |
to_pickle = {'X':X, 'lasso':lasso, 'dtypes':X.dtypes, 'predictors':predictors} | |
pickle_me(to_pickle, 'model_web_service.pickle') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Part 2: Listener | |
# Repeat the setup block for the listener and sender | |
import sklearn as sk | |
import numpy as np | |
from sklearn.datasets import load_boston | |
from sklearn.linear_model import LassoCV | |
import pandas as pd | |
from flask import Flask | |
from flask import request | |
import requests | |
import pickle | |
from time import time | |
def unpickle_me(infile): | |
with open(infile, 'rb') as f: | |
unpickled = pickle.load(f) | |
return unpickled | |
# Read in our pickled dictionary | |
stored = unpickle_me('model_web_service.pickle') | |
# Read our data and model back into memory | |
df_types = stored['dtypes'] | |
predictors = stored['predictors'] | |
X = stored['X'] | |
lasso = stored['lasso'] | |
# Next, create a scoring function that takes a JSON, and returns a model prediction. | |
def score_obs(indata, model=lasso, predictors=predictors, df_types=df_types): | |
""" | |
Scoring function. Takes a JSON of variables and values, then runs them through the model and returns a prediction | |
""" | |
# Structure the input into a DF, but then impose the expected variable structure, using the .loc slicer | |
data = pd.read_json(indata,typ='series').to_frame().T.loc[:,predictors] | |
# .loc will drop excess, and create variables that aren't present. Fill their values so the model runs | |
data.fillna(0, inplace=True) # !!! In practice, you should actually build in error handling or imputation | |
# Fix the dtypes | |
for c in data: | |
data[c] = data[c].astype(df_types[c]) | |
# predict returns an array, so grab the first value | |
return model.predict(data.values.reshape(1,-1))[0] | |
# To make sure everything is working, compare a direct prediction against one from the function | |
print("Direct prediction: {} \nUsing this function: {}".format(lasso.predict(X.iloc[0:1])[0],score_obs(X.iloc[0].to_json()))) | |
# Now the model is in memory, and we have a scoring function! Let's set up the web service. | |
app = Flask(__name__) | |
app.silent = True # Suppresses logging and errors. In practice, you'll likely want them, or import logging | |
@app.route("/mass", methods=['POST']) | |
def predict_medv(): | |
# Get the JSON from the request and send to our scoring function | |
score = score_obs(request.get_json()) | |
return "{0:0.2f}".format(score) | |
app.run(port=1234) # defaults to localhost. Use host= option to change | |
# At this point, the web service is up! |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Part 3: Sender | |
# Repeat the setup block for the listener and sender | |
import sklearn as sk | |
import numpy as np | |
from sklearn.datasets import load_boston | |
from sklearn.linear_model import LassoCV | |
import pandas as pd | |
from flask import Flask | |
from flask import request | |
import requests | |
import pickle | |
from time import time | |
def unpickle_me(infile): | |
with open(infile, 'rb') as f: | |
unpickled = pickle.load(f) | |
return unpickled | |
# Read in our pickled dictionary | |
stored = unpickle_me('model_web_service.pickle') | |
# Save the data types we expect, to ensure typing works | |
df_types = stored['dtypes'] | |
predictors = stored['predictors'] | |
X = stored['X'] | |
lasso = stored['lasso'] | |
# We need to generate some test cases from our data. As you saw when creating the scoring function, we're going to turn our data into a JSON. | |
def make_a_post(indx): | |
url = "http://127.0.0.1:1234/mass" | |
data = X.iloc[indx].to_json() | |
req = requests.post(url, json=data) | |
if req.status_code != 200: | |
return .0 | |
return float(req.content.decode("utf-8")) # The prediction will come in as a byte. Decode, then make float | |
# Finally, let's toss obervation 0 to the webservice. We know from earlier, that it should return 30.505530469875787, rounded to 2 decimal-places. | |
print(make_a_post(0)) | |
# And a few more for show. Let's sample over the observations: | |
for i in np.random.choice(range(X.shape[0]), 10): | |
print("Obs {0:03d}: {1}".format(i, make_a_post(i))) | |
# And let's see how long these take to run. Take the average of 100 runs: | |
lag = [] | |
rep = 100 | |
for _ in range(100): | |
t0 = time() | |
make_a_post(0) | |
lag.append(time()-t0) | |
print("Over {0} runs, the web service averaged {1:0.4f} seconds to score".format(rep,np.mean(lag))) | |
# **Success!** |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment