Skip to content

Instantly share code, notes, and snippets.

df["age_reciprocal"] = 1.0 / df["age"]
from scipy import stats
# Must be positive
stats.boxcox(df["sales"])[0]
# For positive data with no zeroes
np.log(df["sales"])
# For positive data with zeroes
np.log1p(df["sales"])
# Convert back - get predictions if target is log transformed
np.expm1(df["sales"])
df["salary_zscore"] = (df["salary"] - df["salary"].mean()) / df["salary"].std()
df["salary_minmax"] = (
df["salary"] - df["salary"].min()) / (df["salary"].max() - df["salary"].min()
)
df.sort_values(by = "variable").groupby("dimension").first()
# Use drop_first = True to avoid collinearity
pd.get_dummies(df, drop_first = True)
pd.qcut(data["measure"], q = 4, labels = False)
# Numeric
pd.cut(df["measure"], bins = 4, labels = False)
# Dimension
pd.cut(df["age"], bins = [0, 18, 25, 99], labels = ["child", "young adult", "adult"])
df.groupby("customer_id")["products"].value_counts().unstack().fillna(0)
df["unique_products"] = df.groupby("customer_id").agg({"products": "unique"})
# Transform each element -> row - Pandas >= 0.25
df["unique_products"].explode()