Skip to content

Instantly share code, notes, and snippets.

aug_rf = RandomForestClassifier()
aug_rf.fit(X_aug_train, y_aug_train)
aug_rf_pred = aug_rf.predict(X_test)
from sklearn.ensemble import RandomForestClassifier
base_rf = RandomForestClassifier()
base_rf.fit(X_raw_train, y_raw_train)
base_rf_pred = base_rf.predict(X_test)
X_synth = synth_df.loc[:, synth_df.columns != "FraudFound"]
X_aug_train = X_raw_train.append(X_synth)
y_synth = synth_df["FraudFound"]
y_aug_train = y_raw_train.append(y_synth)
num_to_gen = sum(y_raw_train == 0) - sum(y_raw_train == 1)
synth_df = synthesizer.sample(n_samples=num_to_gen,
condition_on={
"FraudFound": {
"categories": [1]
}
}).to_pandas()
synthesizer = RegularSynthesizer()
synthesizer.fit(data, metadata=metadata,
condition_on=["FraudFound"])
from ydata.connectors import LocalConnector
from ydata.connectors.filetype import FileType
from ydata.synthesizers.regular.model import RegularSynthesizer
from ydata.labs import DataSources
from ydata.metadata import Metadata
connector = LocalConnector()
data = connector.read_file(path='car_claims_training.csv', file_type=FileType.CSV)
metadata = Metadata(data)
car_claims_training = X_raw_train.copy()
car_claims_training['FraudFound'] = y_raw_train
car_claims_training.to_csv('car_claims_training.csv', index=False)
from sklearn.model_selection import train_test_split
X_raw = car_claims_prepared.loc[:, car_claims_prepared.columns != "FraudFound"]
y_raw = car_claims_prepared["FraudFound"]
X_raw_train, X_test, y_raw_train, y_test = train_test_split(X_raw, y_raw, test_size=0.1, random_state=1)
import pandas as pd
car_claims_raw = pd.read_csv('car_claims_raw.csv')
car_claims_raw_temp = encode_fraud(car_claims_raw)
car_claims_prepared = encode_categorical(car_claims_raw_temp)
from category_encoders import TargetEncoder
def encode_categorical(df):
te = TargetEncoder()
cols = df.select_dtypes('object').columns
for col in cols:
df.loc[:, col] = te.fit_transform(X=df[col], y=df['FraudFound'])
return df