Skip to content

Instantly share code, notes, and snippets.

@superMDguy
Last active February 24, 2018 20:47
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save superMDguy/72689a11746079677ddb0d19f26443a1 to your computer and use it in GitHub Desktop.
Save superMDguy/72689a11746079677ddb0d19f26443a1 to your computer and use it in GitHub Desktop.
Adds weather data to dataset for kaggle "Recruit Restaurant Visitor Forecasting" competition
train.visit_date = pd.to_datetime(train.visit_date)
test.visit_date = pd.to_datetime(test.visit_date)
def add_weather(dataset):
print('Adding weather...')
air_nearest = pd.read_csv(
'../../data/raw/weather/air_store_info_with_nearest_active_station.csv')
unique_air_store_ids = list(dataset.air_store_id.unique())
weather_dir = '../../data/raw/weather/1-1-16_5-31-17_Weather/'
weather_keep_columns = ['precipitation', 'avg_temperature']
dataset_with_weather = dataset.copy()
for column in weather_keep_columns:
dataset_with_weather[column] = np.nan
for air_id in unique_air_store_ids:
station = air_nearest[air_nearest.air_store_id == air_id].station_id.iloc[0]
weather_data = pd.read_csv(weather_dir + station + '.csv', parse_dates=['calendar_date']).rename(columns={'calendar_date': 'visit_date'})
this_store = dataset.air_store_id == air_id
merged = dataset[this_store].merge(weather_data, on='visit_date', how='left')
for column in weather_keep_columns:
dataset_with_weather.loc[this_store, column] = merged[column]
return dataset_with_weather
train = add_weather(train)
test = add_weather(test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment