Skip to content

Instantly share code, notes, and snippets.

View bgweber's full-sized avatar

Ben Weber bgweber

View GitHub Profile
@bgweber
bgweber / pandasUDF.py
Last active May 19, 2022 09:19
Distributing Feature Generation with Pandas UDFs
import featuretools as ft
from pyspark.sql.functions import pandas_udf, PandasUDFType
@pandas_udf(schema, PandasUDFType.GROUPED_MAP)
def apply_feature_generation(pandasInputDF):
# create Entity Set representation
es = ft.EntitySet(id="events")
es = es.entity_from_dataframe(entity_id="events", dataframe=pandasInputDF)
es = es.normalize_entity(base_entity_id="events", new_entity_id="users", index="user_id")
import pandas as pd
df = pd.read_csv("game_skater_stats.csv")
df = df[df['player_id'] == 8467412]
print(df.head(3))
for index, row in df.iterrows():
event = { "playerID": int(row['player_id']), "Game_ID": int(row['game_id']),
"goals": int(row['goals']), "assists": int(row['assists']),
"shots": int(row['shots']), "hits": int(row['hits']) }
import fakeredis
import json
server = fakeredis.FakeServer()
redis = fakeredis.FakeStrictRedis(server=server)
print(redis)
# try fetching a record
userID = 12345
record = redis.get(userID)
import flask
import fakeredis
import json
server = fakeredis.FakeServer()
redis = fakeredis.FakeStrictRedis(server=server)
app = flask.Flask(__name__)
# endpoint for profile updates
@app.route("/update", methods=["GET","POST"])
# define a schema for the result set, the user ID and model prediction
schema = StructType([StructField('user_id', LongType(), True),
StructField('prediction', DoubleType(), True)])
# define the Pandas UDF
@pandas_udf(schema, PandasUDFType.GROUPED_MAP)
def apply_model(sample_pd):
# run the model on the partitioned data set
ids = sample_df['user_id']
import dash
from flask import Flask
from flask_dance.contrib.google import google as flask_google
from datetime import datetime
import dash_html_components as html
from dash_google_auth import GoogleOAuth
server = Flask(__name__)
server.config["GOOGLE_OAUTH_CLIENT_ID"] = 'YOUR_CLIENT_ID'
from flask import Flask
from flask_httpauth import HTTPTokenAuth
app = Flask(__name__)
auth = HTTPTokenAuth(scheme='Token')
@auth.verify_token
def verify_token(token):
return '1234567890abcdefg' == token
import requests
headers = { 'Authorization' : 'Token 1234567890abcdefg' }
result = requests.post("http://localhost:8000", headers=headers, \
json = { 'G1':'1', 'G2':'0', 'G3':'0', 'G4':'0', 'G5':'0', \
'G6':'0', 'G7':'0', 'G8':'0', 'G9':'0', 'G10':'0'})
print(result)
print(result.text)
import requests
result = requests.post("http://localhost", \
json = { 'G1':'1', 'G2':'0', 'G3':'0', 'G4':'0', 'G5':'0', \
'G6':'0', 'G7':'0', 'G8':'0', 'G9':'0', 'G10':'0'})
print(result)
print(result.json())
import pandas as pd
from sklearn.linear_model import LogisticRegression
import flask
df = pd.read_csv("https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv")
model = LogisticRegression()
model.fit(df.drop(['label'], axis=1), df['label'])
app = flask.Flask(__name__)