Skip to content

Instantly share code, notes, and snippets.

View bgweber's full-sized avatar

Ben Weber bgweber

View GitHub Profile
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.types import *
# setup the spark data frame as a table
boston_sp.createOrReplaceTempView("boston")
# add train/test label and expand the data set by 3x (each num trees parameter)
full_df = spark.sql("""
select *
from (
from sklearn.linear_model import LinearRegression
from scipy.stats.stats import pearsonr
# split into data and label arrays
y = boston_pd['target']
X = boston_pd.drop(['target'], axis=1)
# create training (~80%) and test data sets
X_train = X[:400]
X_test = X[400:]
# Load libraries
import flask
import pandas as pd
import tensorflow as tf
import keras
from keras.models import load_model
# instantiate flask
app = flask.Flask(__name__)
# load Flask
import flask
app = flask.Flask(__name__)
# define a predict function as an endpoint
@app.route("/predict", methods=["GET","POST"])
def predict():
data = {"success": False}
# get the request parameters
# import panda, keras and tensorflow
import pandas as pd
import tensorflow as tf
import keras
from keras import models, layers
# Load the sample data set and split into x and y data frames
df = pd.read_csv("https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv")
x = df.drop(['label'], axis=1)
y = df['label']
# pull all data to the driver node
sample_df = spark_df.toPandas()
# create a prediction for each user
ids = sample_df['user_id']
x_train = sample_df.drop(['label', 'user_id', 'partition_id'], axis=1)
pred = model.predict_proba(x_train)
result_df = pd.DataFrame({'user_id': ids, 'prediction': pred[:,1]})
# display the results
# train a model, but first, pull everything to the driver node
df = spark_df.toPandas().drop(['user_id', 'partition_id'], axis = 1)
y_train = df['label']
x_train = df.drop(['label'], axis=1)
# use logistic regression
model = LogisticRegression()
model.fit(x_train, y_train)
# load pandas, sklearn, and pyspark types and functions
import pandas as pd
from sklearn.linear_model import LogisticRegression
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.types import *
# load the CSV as a Spark data frame
pandas_df = pd.read_csv(
"https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv")
spark_df = spark.createDataFrame(pandas_df)
# assign a user ID and a partition ID using Spark SQL
spark_df.createOrReplaceTempView("spark_df")
spark_df = spark.sql("""
select *, user_id%10 as partition_id
from (
@bgweber
bgweber / serving.py
Last active February 17, 2020 16:28
import pandas as pd
from sklearn.linear_model import LogisticRegression
import flask
df = pd.read_csv("https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv")
model = LogisticRegression()
model.fit(df.drop(['label'], axis=1), df['label'])
app = flask.Flask(__name__)