Skip to content

Instantly share code, notes, and snippets.

@jeethu
Created Aug 6, 2022
Embed
What would you like to do?
"""
Remove Dangerous Features
https://forum.numer.ai/t/removing-dangerous-features/5627/16
"""
import numpy as np
import pandas as pd
V3 = frozenset(
[
"feature_base_ingrain_calligrapher",
"feature_unvaried_social_bangkok",
"feature_deliberative_connatural_kinetoscope",
"feature_haziest_lifelike_horseback",
"feature_accusatory_disinfectant_deportment",
"feature_exorbitant_myeloid_crinkle",
"feature_jerkwater_eustatic_electrocardiograph",
"feature_undivorced_unsatisfying_praetorium",
"feature_direst_interrupted_paloma",
"feature_lofty_acceptable_challenge",
]
)
V4 = frozenset(
[
"feature_palpebral_univalve_pennoncel",
"feature_unsustaining_chewier_adnoun",
"feature_brainish_nonabsorbent_assurance",
"feature_coastal_edible_whang",
"feature_disprovable_topmost_burrower",
"feature_trisomic_hagiographic_fragrance",
"feature_queenliest_childing_ritual",
"feature_censorial_leachier_rickshaw",
"feature_daylong_ecumenic_lucina",
"feature_steric_coxcombic_relinquishment",
]
)
def remove_dangerous_features(
df: pd.DataFrame,
copy: bool = True,
*,
replacement_value=np.nan,
) -> pd.DataFrame:
if len([x for x in df.columns if x in V3]) == len(V3):
bad_features = V3
elif len([x for x in df.columns if x in V4]) == len(V4):
bad_features = V4
else:
raise RuntimeError("Unknown dataset")
if copy:
tmp = df.copy()
else:
tmp = df
tmp[list(bad_features)] = replacement_value
return tmp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment