spencercarter/model_web_service_flask_post_p1.py

## model_web_service_flask_post_p1.py
# Part 1: Model

import sklearn as sk
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LassoCV
import pandas as pd
from flask import Flask
from flask import request
import requests
import pickle
from time import time

# Load up the Mass data... I like keeping things in pandas DFs
mass = load_boston()

# Lowecase the predictor names and send the data to Pandas
predictors = [var.lower() for var in mass.feature_names]
X = pd.DataFrame(mass['data'], columns=predictors)
y = pd.Series(mass['target']) # medv

# Run a 3-fold CV LASSO with 10 weights chosen by steepest descent
lasso = LassoCV(n_alphas=10, normalize=True, cv=3)

print("Fitting model...")
lasso.fit(X,y)

print("Model fit. R-Squared = {0:0.2f}%".format(100*lasso.score(X,y)))

# A couple simple pickle functions
def pickle_me(obj, outfile):
    with open(outfile, 'wb') as f:
        pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
    return None

def unpickle_me(infile):
    with open(infile, 'rb') as f:
        unpickled = pickle.load(f)
    return unpickled

to_pickle = {'X':X, 'lasso':lasso, 'dtypes':X.dtypes, 'predictors':predictors}
pickle_me(to_pickle, 'model_web_service.pickle')

## model_web_service_flask_post_p2.py
# Part 2: Listener

# Repeat the setup block for the listener and sender
import sklearn as sk
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LassoCV
import pandas as pd
from flask import Flask
from flask import request
import requests
import pickle
from time import time

def unpickle_me(infile):
    with open(infile, 'rb') as f:
        unpickled = pickle.load(f)
    return unpickled

# Read in our pickled dictionary
stored = unpickle_me('model_web_service.pickle')

# Read our data and model back into memory
df_types = stored['dtypes']
predictors = stored['predictors']
X = stored['X']
lasso = stored['lasso']

# Next, create a scoring function that takes a JSON, and returns a model prediction.
def score_obs(indata, model=lasso, predictors=predictors, df_types=df_types):
    """
    Scoring function. Takes a JSON of variables and values, then runs them through the model and returns a prediction
    """

    # Structure the input into a DF, but then impose the expected variable structure, using the .loc slicer
    data = pd.read_json(indata,typ='series').to_frame().T.loc[:,predictors]

    # .loc will drop excess, and create variables that aren't present. Fill their values so the model runs
    data.fillna(0, inplace=True) # !!! In practice, you should actually build in error handling or imputation

    # Fix the dtypes
    for c in data:
        data[c] = data[c].astype(df_types[c])

    # predict returns an array, so grab the first value
    return model.predict(data.values.reshape(1,-1))[0]

# To make sure everything is working, compare a direct prediction against one from the function
print("Direct prediction: {} \nUsing this function: {}".format(lasso.predict(X.iloc[0:1])[0],score_obs(X.iloc[0].to_json())))

# Now the model is in memory, and we have a scoring function! Let's set up the web service.
app = Flask(__name__)

app.silent = True # Suppresses logging and errors. In practice, you'll likely want them, or import logging

@app.route("/mass", methods=['POST'])
def predict_medv():

    # Get the JSON from the request and send to our scoring function
    score = score_obs(request.get_json())

    return "{0:0.2f}".format(score)

app.run(port=1234) # defaults to localhost. Use host= option to change

# At this point, the web service is up!

## model_web_service_flask_post_p3.py
# Part 3: Sender

# Repeat the setup block for the listener and sender
import sklearn as sk
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LassoCV
import pandas as pd
from flask import Flask
from flask import request
import requests
import pickle
from time import time

def unpickle_me(infile):
    with open(infile, 'rb') as f:
        unpickled = pickle.load(f)
    return unpickled

# Read in our pickled dictionary
stored = unpickle_me('model_web_service.pickle')

# Save the data types we  expect, to ensure typing works
df_types = stored['dtypes']
predictors = stored['predictors']
X = stored['X']
lasso = stored['lasso']


# We need to generate some test cases from our data. As you saw when creating the scoring function, we're going to turn our data into a JSON.
def make_a_post(indx):
    url = "http://127.0.0.1:1234/mass"
    data = X.iloc[indx].to_json()
    req = requests.post(url, json=data)
    if req.status_code != 200:
        return .0
    return float(req.content.decode("utf-8")) # The prediction will come in as a byte. Decode, then make float


# Finally, let's toss obervation 0 to the webservice. We know from earlier, that it should return 30.505530469875787, rounded to 2 decimal-places.
print(make_a_post(0))


# And a few more for show. Let's sample over the observations:
for i in np.random.choice(range(X.shape[0]), 10):
    print("Obs {0:03d}: {1}".format(i, make_a_post(i)))


# And let's see how long these take to run. Take the average of 100 runs:
lag = []
rep = 100
for _ in range(100):
    t0 = time()
    make_a_post(0)
    lag.append(time()-t0)
print("Over {0} runs, the web service averaged {1:0.4f} seconds to score".format(rep,np.mean(lag)))

# **Success!**
	# Part 1: Model

	import sklearn as sk
	import numpy as np
	from sklearn.datasets import load_boston
	from sklearn.linear_model import LassoCV
	import pandas as pd
	from flask import Flask
	from flask import request
	import requests
	import pickle
	from time import time

	# Load up the Mass data... I like keeping things in pandas DFs
	mass = load_boston()

	# Lowecase the predictor names and send the data to Pandas
	predictors = [var.lower() for var in mass.feature_names]
	X = pd.DataFrame(mass['data'], columns=predictors)
	y = pd.Series(mass['target']) # medv

	# Run a 3-fold CV LASSO with 10 weights chosen by steepest descent
	lasso = LassoCV(n_alphas=10, normalize=True, cv=3)

	print("Fitting model...")
	lasso.fit(X,y)

	print("Model fit. R-Squared = {0:0.2f}%".format(100*lasso.score(X,y)))

	# A couple simple pickle functions
	def pickle_me(obj, outfile):
	with open(outfile, 'wb') as f:
	pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
	return None

	def unpickle_me(infile):
	with open(infile, 'rb') as f:
	unpickled = pickle.load(f)
	return unpickled

	to_pickle = {'X':X, 'lasso':lasso, 'dtypes':X.dtypes, 'predictors':predictors}
	pickle_me(to_pickle, 'model_web_service.pickle')
	# Part 2: Listener

	# Repeat the setup block for the listener and sender
	import sklearn as sk
	import numpy as np
	from sklearn.datasets import load_boston
	from sklearn.linear_model import LassoCV
	import pandas as pd
	from flask import Flask
	from flask import request
	import requests
	import pickle
	from time import time

	def unpickle_me(infile):
	with open(infile, 'rb') as f:
	unpickled = pickle.load(f)
	return unpickled

	# Read in our pickled dictionary
	stored = unpickle_me('model_web_service.pickle')

	# Read our data and model back into memory
	df_types = stored['dtypes']
	predictors = stored['predictors']
	X = stored['X']
	lasso = stored['lasso']

	# Next, create a scoring function that takes a JSON, and returns a model prediction.
	def score_obs(indata, model=lasso, predictors=predictors, df_types=df_types):
	"""
	Scoring function. Takes a JSON of variables and values, then runs them through the model and returns a prediction
	"""

	# Structure the input into a DF, but then impose the expected variable structure, using the .loc slicer
	data = pd.read_json(indata,typ='series').to_frame().T.loc[:,predictors]

	# .loc will drop excess, and create variables that aren't present. Fill their values so the model runs
	data.fillna(0, inplace=True) # !!! In practice, you should actually build in error handling or imputation

	# Fix the dtypes
	for c in data:
	data[c] = data[c].astype(df_types[c])

	# predict returns an array, so grab the first value
	return model.predict(data.values.reshape(1,-1))[0]

	# To make sure everything is working, compare a direct prediction against one from the function
	print("Direct prediction: {} \nUsing this function: {}".format(lasso.predict(X.iloc[0:1])[0],score_obs(X.iloc[0].to_json())))

	# Now the model is in memory, and we have a scoring function! Let's set up the web service.
	app = Flask(__name__)

	app.silent = True # Suppresses logging and errors. In practice, you'll likely want them, or import logging

	@app.route("/mass", methods=['POST'])
	def predict_medv():

	# Get the JSON from the request and send to our scoring function
	score = score_obs(request.get_json())

	return "{0:0.2f}".format(score)

	app.run(port=1234) # defaults to localhost. Use host= option to change

	# At this point, the web service is up!
	# Part 3: Sender

	# Repeat the setup block for the listener and sender
	import sklearn as sk
	import numpy as np
	from sklearn.datasets import load_boston
	from sklearn.linear_model import LassoCV
	import pandas as pd
	from flask import Flask
	from flask import request
	import requests
	import pickle
	from time import time

	def unpickle_me(infile):
	with open(infile, 'rb') as f:
	unpickled = pickle.load(f)
	return unpickled

	# Read in our pickled dictionary
	stored = unpickle_me('model_web_service.pickle')

	# Save the data types we expect, to ensure typing works
	df_types = stored['dtypes']
	predictors = stored['predictors']
	X = stored['X']
	lasso = stored['lasso']


	# We need to generate some test cases from our data. As you saw when creating the scoring function, we're going to turn our data into a JSON.
	def make_a_post(indx):
	url = "http://127.0.0.1:1234/mass"
	data = X.iloc[indx].to_json()
	req = requests.post(url, json=data)
	if req.status_code != 200:
	return .0
	return float(req.content.decode("utf-8")) # The prediction will come in as a byte. Decode, then make float


	# Finally, let's toss obervation 0 to the webservice. We know from earlier, that it should return 30.505530469875787, rounded to 2 decimal-places.
	print(make_a_post(0))


	# And a few more for show. Let's sample over the observations:
	for i in np.random.choice(range(X.shape[0]), 10):
	print("Obs {0:03d}: {1}".format(i, make_a_post(i)))


	# And let's see how long these take to run. Take the average of 100 runs:
	lag = []
	rep = 100
	for _ in range(100):
	t0 = time()
	make_a_post(0)
	lag.append(time()-t0)
	print("Over {0} runs, the web service averaged {1:0.4f} seconds to score".format(rep,np.mean(lag)))

	# Success!