This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM ubuntu:latest | |
MAINTAINER Ben Weber | |
RUN apt-get update \ | |
&& apt-get install -y python3-pip python3-dev \ | |
&& cd /usr/local/bin \ | |
&& ln -s /usr/bin/python3 python | |
RUN pip3 install flask | |
RUN pip3 install pandas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
result = requests.post("http://localhost:5000", \ | |
json = { 'G1':'1', 'G2':'0', 'G3':'0', 'G4':'0', 'G5':'0', \ | |
'G6':'0', 'G7':'0', 'G8':'0', 'G9':'0', 'G10':'0'}) | |
print(result) | |
print(result.json()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# connect to the monitoring service | |
from google.cloud import logging | |
logging_client = logging.Client(project = 'serving-268422', credentials = credentials) | |
logger = logging_client.logger('model_service') | |
# log a message to stack driver | |
logger.log_text('Hello World!') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# connect to the monitoring service | |
from google.cloud import monitoring_v3 | |
from google.oauth2 import service_account | |
import time | |
credentials = service_account.Credentials.from_service_account_file('serving.json') | |
client = monitoring_v3.MetricServiceClient(credentials = credentials) | |
project_name = client.project_path('serving-268422') | |
# create a custom metric |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.linear_model import LogisticRegression | |
import flask | |
df = pd.read_csv("https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv") | |
model = LogisticRegression() | |
model.fit(df.drop(['label'], axis=1), df['label']) | |
app = flask.Flask(__name__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load pandas, sklearn, and pyspark types and functions | |
import pandas as pd | |
from sklearn.linear_model import LogisticRegression | |
from pyspark.sql.functions import pandas_udf, PandasUDFType | |
from pyspark.sql.types import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load the CSV as a Spark data frame | |
pandas_df = pd.read_csv( | |
"https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv") | |
spark_df = spark.createDataFrame(pandas_df) | |
# assign a user ID and a partition ID using Spark SQL | |
spark_df.createOrReplaceTempView("spark_df") | |
spark_df = spark.sql(""" | |
select *, user_id%10 as partition_id | |
from ( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# train a model, but first, pull everything to the driver node | |
df = spark_df.toPandas().drop(['user_id', 'partition_id'], axis = 1) | |
y_train = df['label'] | |
x_train = df.drop(['label'], axis=1) | |
# use logistic regression | |
model = LogisticRegression() | |
model.fit(x_train, y_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pull all data to the driver node | |
sample_df = spark_df.toPandas() | |
# create a prediction for each user | |
ids = sample_df['user_id'] | |
x_train = sample_df.drop(['label', 'user_id', 'partition_id'], axis=1) | |
pred = model.predict_proba(x_train) | |
result_df = pd.DataFrame({'user_id': ids, 'prediction': pred[:,1]}) | |
# display the results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# define a schema for the result set, the user ID and model prediction | |
schema = StructType([StructField('user_id', LongType(), True), | |
StructField('prediction', DoubleType(), True)]) | |
# define the Pandas UDF | |
@pandas_udf(schema, PandasUDFType.GROUPED_MAP) | |
def apply_model(sample_pd): | |
# run the model on the partitioned data set | |
ids = sample_df['user_id'] |