This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.functions import pandas_udf, PandasUDFType | |
from pyspark.sql.types import * | |
# setup the spark data frame as a table | |
boston_sp.createOrReplaceTempView("boston") | |
# add train/test label and expand the data set by 3x (each num trees parameter) | |
full_df = spark.sql(""" | |
select * | |
from ( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LinearRegression | |
from scipy.stats.stats import pearsonr | |
# split into data and label arrays | |
y = boston_pd['target'] | |
X = boston_pd.drop(['target'], axis=1) | |
# create training (~80%) and test data sets | |
X_train = X[:400] | |
X_test = X[400:] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load libraries | |
import flask | |
import pandas as pd | |
import tensorflow as tf | |
import keras | |
from keras.models import load_model | |
# instantiate flask | |
app = flask.Flask(__name__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load Flask | |
import flask | |
app = flask.Flask(__name__) | |
# define a predict function as an endpoint | |
@app.route("/predict", methods=["GET","POST"]) | |
def predict(): | |
data = {"success": False} | |
# get the request parameters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import panda, keras and tensorflow | |
import pandas as pd | |
import tensorflow as tf | |
import keras | |
from keras import models, layers | |
# Load the sample data set and split into x and y data frames | |
df = pd.read_csv("https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv") | |
x = df.drop(['label'], axis=1) | |
y = df['label'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pull all data to the driver node | |
sample_df = spark_df.toPandas() | |
# create a prediction for each user | |
ids = sample_df['user_id'] | |
x_train = sample_df.drop(['label', 'user_id', 'partition_id'], axis=1) | |
pred = model.predict_proba(x_train) | |
result_df = pd.DataFrame({'user_id': ids, 'prediction': pred[:,1]}) | |
# display the results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# train a model, but first, pull everything to the driver node | |
df = spark_df.toPandas().drop(['user_id', 'partition_id'], axis = 1) | |
y_train = df['label'] | |
x_train = df.drop(['label'], axis=1) | |
# use logistic regression | |
model = LogisticRegression() | |
model.fit(x_train, y_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load pandas, sklearn, and pyspark types and functions | |
import pandas as pd | |
from sklearn.linear_model import LogisticRegression | |
from pyspark.sql.functions import pandas_udf, PandasUDFType | |
from pyspark.sql.types import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load the CSV as a Spark data frame | |
pandas_df = pd.read_csv( | |
"https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv") | |
spark_df = spark.createDataFrame(pandas_df) | |
# assign a user ID and a partition ID using Spark SQL | |
spark_df.createOrReplaceTempView("spark_df") | |
spark_df = spark.sql(""" | |
select *, user_id%10 as partition_id | |
from ( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.linear_model import LogisticRegression | |
import flask | |
df = pd.read_csv("https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv") | |
model = LogisticRegression() | |
model.fit(df.drop(['label'], axis=1), df['label']) | |
app = flask.Flask(__name__) |