Skip to content

Instantly share code, notes, and snippets.

View Wann-Jiun's full-sized avatar

Wann-Jiun Ma Wann-Jiun

View GitHub Profile
from scipy.stats import skew
skewed_features = train_df[numerical_features].apply(lambda x: skew(x.dropna().astype(float)))
skewed_features = skewed_features[skewed_features > 0.75]
skewed_features = skewed_features.index
train_df[skewed_features] = np.log1p(train_df[skewed_features])
test_df[skewed_features] = np.log1p(test_df[skewed_features])
from sklearn.preprocessing import StandardScaler
scaling = StandardScaler()
scaling.fit(train_df[numerical_features])
scaling_result = scaling.transform(train_df[numerical_features])
for i, col in enumerate(numerical_features):
train_df[col] = scaling_result[:, i]
scaling_result = scaler.transform(test_df_munged[numeric_features])
for i, col in enumerate(numeric_features):
train_df["MasVnrArea"].fillna(0, inplace=True)
all_df["CentralAir"] = (df["CentralAir"] == "Y") * 1.0
dummies = pd.get_dummies(train_df[column_name], prefix = "_" + column_name)
train_df = train_df.join(dummies)
train_df = train_df.drop([column_name], axis=1)
all_df["LotFrontage"] = df["LotFrontage"]
for key, group in lot_frontage_by_neighborhood:
idx = (df["Neighborhood"] == key) & (df["LotFrontage"].isnull())
all_df.loc[idx, "LotFrontage"] = group.median()
train_df["Is_Electrical_SBrkr"] = (df["Electrical"] == "SBrkr") * 1
train_df["Aggregate_OverallQual"] = train_df.OverallQual.replace(
{1 : 1, 2 : 1, 3 : 1, 4 : 2, 5 : 2, 6 : 2, 7 : 3, 8 : 3, 9 : 3, 10 : 3})
train_df["New_House"] = (train_df["YearRemodAdd"] == train_df["YrSold"]) * 1
# Kernel Ridge GridSearch
from sklearn.kernel_ridge import KernelRidge
kridge_grid = KernelRidge()
parameter_grid = {'alpha': [0.0001,0.001,0.01,0.1],
'degree': [1,2,3,4],
'kernel': ['polynomial']
#'n_estimators': [200,210,240,250],
#'min_child_weight': [1,2,3,4]