Skip to content

Instantly share code, notes, and snippets.

View shaypal5's full-sized avatar
🐢
Working away...

Shay Palachy-Affek shaypal5

🐢
Working away...
View GitHub Profile
@shaypal5
shaypal5 / pdp_post_adv2.py
Last active August 1, 2022 17:36
An example for an advanced initialization of a complex pdpipe pipeline for processing pandas dataframes. 🐼🚿
>>> mp = MyPipelineAndModel(
savings_max_val=101,
drop_gender=False,
standardize=True,
ohencode_country=True,
savings_bin_val=1,
pca_threshold=25,
fit_intercept=True)
>>> mp
<PdPipeline -> LogisticRegression>
@shaypal5
shaypal5 / pdp_post_adv.py
Last active August 5, 2022 15:08
An example for an advanced initialization of a complex pdpipe pipeline for processing pandas dataframes. 🐼🚿
from typing import Optional
import pdpipe as pdp
from pdpipe import df
from sklearn.linear_model import LogisticRegression
from pdpipe.skintegrate import PdPipelineAndSklearnEstimator
class MyPipelineAndModel(PdPipelineAndSklearnEstimator):
def __init__(
self,
@shaypal5
shaypal5 / pdpipe_2nd_look.py
Last active July 9, 2022 16:06
Another minimal example of some pdpipe features.
>>> df = pd.DataFrame(
... [[23, 'Jo', 45], [19, 'Bo', 72], [15, 'Di', 12], [5, 'Jo', 0]],
... columns=['age', 'name', 'salary'])
>>> df
age name salary
0 23 Jo 45
1 19 Bo 72
2 15 Di 12
3 5 Jo 0
>>> pipeline = pdp.DropDuplicates('name').Bin({'salary': [0, 20, 50]}) \
@shaypal5
shaypal5 / funk_mf_recommender.py
Created July 1, 2022 10:34
A naive implementation of Funk's MF for collaborative filtering (commonly and wrongly called SVD for collaborative filtering). Might contain mistakes (let me know).
from typing import Tuple, Optional
import numpy as np
import pandas as pd
def train_val_split(
training_df: pd.DataFrame,
val_ratio: float,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Splits the input training dataset into train/val set.
@shaypal5
shaypal5 / pdpipe_first_look.py
Last active June 27, 2022 13:16
pdpipe first look
>>> df = pd.DataFrame(
data=[[4, 165, 'USA'], [2, 180, 'UK'], [2, 170, 'Greece']],
index=['Dana', 'Jane', 'Nick'],
columns=['Medals', 'Height', 'Born']
)
>>> df
Medals Height Born
Dana 4 165 USA
Jane 2 180 UK
Nick 2 170 Greece
@shaypal5
shaypal5 / deepchecks-phishing-grad-boost-model-eval.py
Created January 16, 2022 21:06
Deepchecks Phishing URLs Example: Gradient Boosting Model Evaluation
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier(n_estimators=250, random_state=SEED, max_depth=20, subsample=0.8 , loss='exponential')
model.fit(train_X, train_y)
msuite.run(model=model, train_dataset=ds_train, test_dataset=ds_test)
@shaypal5
shaypal5 / deepchecks-phishing-random-forest-model-eval.py
Created January 16, 2022 20:53
Deepchecks Phishing URLs Example: Random Forest Model Evaluation
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(criterion='entropy', splitter='random', random_state=SEED)
model.fit(train_X, train_y)
msuite.run(model=model, train_dataset=ds_train, test_dataset=ds_test)
@shaypal5
shaypal5 / deepchecks-phishing-log-reg-model-eval.py
Last active January 16, 2022 18:30
Deepchecks Phishing URLs Example: Log Reg Model Evaluation
from deepchecks.suites import model_evaluation
msuite = model_evaluation()
msuite.run(model=logreg, train_dataset=ds_train, test_dataset=ds_test)
@shaypal5
shaypal5 / deepchecks-phishing-first-train-test-val.py
Created January 16, 2022 18:18
Deepchecks Phishing URLs Example: First Train Test Validation Suite
from deepchecks.suites import train_test_validation
vsuite = train_test_validation()
ds_train = deepchecks.Dataset(df=train_X, label=train_y, set_datetime_from_dataframe_index=True, cat_features=[])
ds_test = deepchecks.Dataset(df=test_X, label=test_y, set_datetime_from_dataframe_index=True, cat_features=[])
vsuite.run(model=logreg, train_dataset=ds_train, test_dataset=ds_test)
@shaypal5
shaypal5 / deepchecks-phishing-preprocessing.py
Last active January 16, 2022 18:10
Deepchecks Phishing URLs Example: Preprocessing
from deepchecks.datasets.classification.phishing import get_url_preprocessor
pipeline = get_url_preprocessor()
train_df = pipeline.fit_transform(raw_train_df)
train_X = train_df.drop('target', axis=1)
train_y = train_df['target']
test_df = pipeline.transform(raw_test_df)
test_X = test_df.drop('target', axis=1)
test_y = test_df['target']