Skip to content

Instantly share code, notes, and snippets.

View bgweber's full-sized avatar

Ben Weber bgweber

View GitHub Profile
# load Flask
import flask
app = flask.Flask(__name__)
# define a predict function as an endpoint
@app.route("/predict", methods=["GET","POST"])
def predict():
data = {"success": False}
# get the request parameters
# Load libraries
import flask
import pandas as pd
import tensorflow as tf
import keras
from keras.models import load_model
# instantiate flask
app = flask.Flask(__name__)
# import panda, keras and tensorflow
import pandas as pd
import tensorflow as tf
import keras
from keras import models, layers
# Load the sample data set and split into x and y data frames
df = pd.read_csv("https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv")
x = df.drop(['label'], axis=1)
y = df['label']
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.types import *
# setup the spark data frame as a table
boston_sp.createOrReplaceTempView("boston")
# add train/test label and expand the data set by 3x (each num trees parameter)
full_df = spark.sql("""
select *
from (
# spark version
from pyspark.ml.regression import RandomForestRegressor
# define a function to train a RF model and return metrics
def mllib_random_forest(trees, boston_train, boston_test):
# train a random forest regressor with the specified number of trees
rf = RandomForestRegressor(numTrees = trees, labelCol="target")
model = rf.fit(boston_train)
# sklearn version
from sklearn.ensemble import RandomForestRegressor as RFR
from multiprocessing.pool import ThreadPool
# allow up to 5 concurrent threads
pool = ThreadPool(5)
# hyperparameters to test out (n_trees)
parameters = [ 10, 20, 50]
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.ml.evaluation import RegressionEvaluator
crossval = CrossValidator(estimator=LinearRegression(labelCol = "target"),
estimatorParamMaps=ParamGridBuilder().addGrid(
LinearRegression.elasticNetParam, [0, 0.5, 1.0]).build(),
evaluator=RegressionEvaluator(
labelCol = "target", metricName = "r2"),
numFolds=10)
# linear regresion with Spark
from pyspark.ml.regression import LinearRegression
# linear regression
lr = LinearRegression(maxIter=10, regParam=0.1,
elasticNetParam=0.5, labelCol="target")
# Fit the model
model = lr.fit(boston_train)
boston_pred = model.transform(boston_test)
from pyspark.ml.feature import VectorAssembler
# convert to a Spark data frame
boston_sp = spark.createDataFrame(boston_pd)
display(boston_sp.take(5))
# split into training and test spark data frames
boston_train = spark.createDataFrame(boston_pd[:400])
boston_test = spark.createDataFrame(boston_pd[400:])
from sklearn.linear_model import LinearRegression
from scipy.stats.stats import pearsonr
# split into data and label arrays
y = boston_pd['target']
X = boston_pd.drop(['target'], axis=1)
# create training (~80%) and test data sets
X_train = X[:400]
X_test = X[400:]