Skip to content

Instantly share code, notes, and snippets.

@Muhammad4hmed
Last active March 24, 2021 20:58
Show Gist options
  • Save Muhammad4hmed/506e66435f8fee01ef3e67a02879b1f9 to your computer and use it in GitHub Desktop.
Save Muhammad4hmed/506e66435f8fee01ef3e67a02879b1f9 to your computer and use it in GitHub Desktop.
def create_date_featues(df, col):
df['Year'] = pd.to_datetime(df[col]).dt.year
df['Month'] = pd.to_datetime(df[col]).dt.month
df['Day'] = pd.to_datetime(df[col]).dt.day
df['Dayofweek'] = pd.to_datetime(df[col]).dt.dayofweek
df['DayOfyear'] = pd.to_datetime(df[col]).dt.dayofyear
df['Week'] = pd.to_datetime(df[col]).dt.week
df['Quarter'] = pd.to_datetime(df[col]).dt.quarter
df['Is_month_start'] = pd.to_datetime(df[col]).dt.is_month_start
df['Is_month_end'] = pd.to_datetime(df[col]).dt.is_month_end
df['Is_quarter_start'] = pd.to_datetime(df[col]).dt.is_quarter_start
df['Is_quarter_end'] = pd.to_datetime(df[col]).dt.is_quarter_end
df['Is_year_start'] = pd.to_datetime(df[col]).dt.is_year_start
df['Is_year_end'] = pd.to_datetime(df[col]).dt.is_year_end
df['Semester'] = np.where(df['Quarter'].isin([1,2]),1,2)
df['Is_weekend'] = np.where(df['Dayofweek'].isin([5,6]),1,0)
df['Is_weekday'] = np.where(df['Dayofweek'].isin([0,1,2,3,4]),1,0)
df['Days_in_month'] = pd.to_datetime(df[col]).dt.days_in_month
df['Hour'] = pd.to_datetime(df[col]).dt.hour
df[col] = pd.to_datetime(df[col])
df['Time'] = [((date.hour*60+(date.minute))*60)+date.second for date in df[col]]
return df
data['date'] = pd.to_datetime(data['date'])
data = create_date_featues(data,'date')
def add_lag_feature(weather_df, window=3):
group_df = weather_df.groupby('site_id')
cols = ['air_temperature', 'cloud_coverage', 'dew_temperature', 'precip_depth_1_hr', 'sea_level_pressure', 'wind_direction', 'wind_speed']
rolled = group_df[cols].rolling(window=window, min_periods=0)
lag_mean = rolled.mean().reset_index().astype(np.float16)
lag_max = rolled.max().reset_index().astype(np.float16)
lag_min = rolled.min().reset_index().astype(np.float16)
lag_std = rolled.std().reset_index().astype(np.float16)
for col in cols:
weather_df[f'{col}_mean_lag{window}'] = lag_mean[col]
weather_df[f'{col}_max_lag{window}'] = lag_max[col]
weather_df[f'{col}_min_lag{window}'] = lag_min[col]
weather_df[f'{col}_std_lag{window}'] = lag_std[col]
add_lag_feature(weather_train_df, window=3)
add_lag_feature(weather_train_df, window=72)
# Cyclic encoding of periodic features; e.g., hour gets mapped to
# 24 where cycle restarts, for days, 365/30, for months, 12 etc
hour_x = cos(2*pi*hour/24)
hour_y = sin(2*pi*hour/24)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment