Skip to content

Instantly share code, notes, and snippets.

View Wann-Jiun's full-sized avatar

Wann-Jiun Ma Wann-Jiun

View GitHub Profile
from scipy.stats import skew
skewed_features = train_df[numerical_features].apply(lambda x: skew(x.dropna().astype(float)))
skewed_features = skewed_features[skewed_features > 0.75]
skewed_features = skewed_features.index
train_df[skewed_features] = np.log1p(train_df[skewed_features])
test_df[skewed_features] = np.log1p(test_df[skewed_features])
from sklearn.preprocessing import StandardScaler
scaling = StandardScaler()
scaling.fit(train_df[numerical_features])
scaling_result = scaling.transform(train_df[numerical_features])
for i, col in enumerate(numerical_features):
train_df[col] = scaling_result[:, i]
scaling_result = scaler.transform(test_df_munged[numeric_features])
for i, col in enumerate(numeric_features):
train_df["MasVnrArea"].fillna(0, inplace=True)
all_df["LotFrontage"] = df["LotFrontage"]
for key, group in lot_frontage_by_neighborhood:
idx = (df["Neighborhood"] == key) & (df["LotFrontage"].isnull())
all_df.loc[idx, "LotFrontage"] = group.median()
all_df["CentralAir"] = (df["CentralAir"] == "Y") * 1.0
quality_dict = {None: 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}
train_df["ExterQual"] = df["ExterQual"].map(quality_dict).astype(int)
train_df["ExterCond"] = df["ExterCond"].map(quality_dict).astype(int)
train_df["BsmtQual"] = df["BsmtQual"].map(quality_dict).astype(int)
train_df["BsmtCond"] = df["BsmtCond"].map(quality_dict).astype(int)
train_df["HeatingQC"] = df["HeatingQC"].map(quality_dict).astype(int)
train_df["KitchenQual"] = df["KitchenQual"].map(quality_dict).astype(int)
train_df["FireplaceQu"] = df["FireplaceQu"].map(quality_dict).astype(int)
train_df["GarageQual"] = df["GarageQual"].map(quality_dict).astype(int)
dummies = pd.get_dummies(train_df[column_name], prefix = "_" + column_name)
train_df = train_df.join(dummies)
train_df = train_df.drop([column_name], axis=1)
train_df["Is_Electrical_SBrkr"] = (df["Electrical"] == "SBrkr") * 1
train_df["Aggregate_OverallQual"] = train_df.OverallQual.replace(
{1 : 1, 2 : 1, 3 : 1, 4 : 2, 5 : 2, 6 : 2, 7 : 3, 8 : 3, 9 : 3, 10 : 3})
train_df["New_House"] = (train_df["YearRemodAdd"] == train_df["YrSold"]) * 1