Skip to content

Instantly share code, notes, and snippets.

View kvnkho's full-sized avatar
👋
Feel free to message me. Contact info in profile

Kevin Kho kvnkho

👋
Feel free to message me. Contact info in profile
View GitHub Profile
from fugue import transform
import pandas as pd
schema = """Model:str, Accuracy:float, AUC:float, Recall:float, Prec:float,
F1:float, Kappa:float, MCC:float, TT_Sec:float"""
def wrapper(df: pd.DataFrame) -> pd.DataFrame:
clf = setup(data = df,
target = 'Survived',
session_id=123,
import fugue_spark
schema = """Model:str, Accuracy:float, AUC:float, Recall:float, Prec:float,
F1:float, Kappa:float, MCC:float, TT_Sec:float, Sex:str"""
def wrapper(df: pd.DataFrame) -> pd.DataFrame:
clf = setup(data = df,
target = 'Survived',
session_id=123,
silent = True,
df = df.drop(["Name", "PassengerId", "Ticket", "Cabin"], axis = 1)
df["Sex"] = pd.factorize(df["Sex"])[0]
dummy = pd.get_dummies(df['Embarked'], prefix='Cabin')
df = pd.concat([df.drop("Embarked", axis=1), dummy], axis = 1)
from sklearn.model_selection import train_test_split
y = df["Survived"]
X = df.drop("Survived", axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
random_state=42)
# fill age variable
fill_age = X_train["Age"].mean()
from typing import Any
from sklearn.linear_model import LogisticRegression
def train_model(model: Any, X_train, X_test, y_train, y_test):
clf = model.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
return {"model": model.__class__.__name__, "params": model.get_params(), "accuracy": acc}
train_model(LogisticRegression(), X_train, X_test, y_train, y_test)
from sklearn.neighbors import KNeighborsClassifier
train_model(KNeighborsClassifier(), X_train, X_test, y_train, y_test)
from prefect import task
@task(nout=4)
def create_data():
df = get_data("titanic")
df = df.drop(["Name", "PassengerId", "Ticket", "Cabin"], axis = 1)
df["Sex"] = pd.factorize(df["Sex"])[0]
dummy = pd.get_dummies(df['Embarked'], prefix='Cabin')
df = pd.concat([df.drop("Embarked", axis=1), dummy], axis = 1)
y = df["Survived"]
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
@task
def get_models():
return [LogisticRegression(random_state=42),
KNeighborsClassifier(), DecisionTreeClassifier(), SVC(),
@task
def train_model(model: Any, X_train, X_test, y_train, y_test):
clf = model.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
return {"model": model.__class__.__name__,
"params": model.get_params(),
"accuracy": acc}
import prefect
@task
def get_results(results):
res = pd.DataFrame(results)
prefect.context.logger.info(res)
return res