Skip to content

Instantly share code, notes, and snippets.

@nsakki55
Created June 8, 2022 11:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nsakki55/5997f9ba7bf4e43633eef7fb5d8e4ce9 to your computer and use it in GitHub Desktop.
Save nsakki55/5997f9ba7bf4e43633eef7fb5d8e4ce9 to your computer and use it in GitHub Desktop.
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.feature_extraction import FeatureHasher
feature_columns = [
"id",
"hour",
"C1",
"banner_pos",
"site_id",
"site_domain",
"site_category",
"app_id",
"app_domain",
"app_category",
"device_id",
"device_ip",
"device_model",
"device_type",
"device_conn_type",
"C14",
"C15",
"C16",
"C17",
"C18",
"C19",
"C20",
"C21",
]
target = "click"
def preprocess(df: pd.DataFrame):
df["hour"] = df["hour"].map(lambda x: datetime.strptime(str(x), "%y%m%d%H"))
df["day_of_week"] = df["hour"].map(lambda x: x.hour)
feature_hasher = FeatureHasher(n_features=2 ** 24, input_type="string")
hashed_feature = feature_hasher.fit_transform(np.asanyarray(df.astype(str)))
return hashed_feature
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment