Created March 30, 2020 17:21
CDC Challenge Weather Data Code
import rasterstats as rs
import geopandas as gpd
import os
import pandas as pd
import glob
shape_frame = gpd.read_file('./tl_2017_us_county/tl_2017_us_county.shp')
shape = shape_frame.to_crs('+proj=longlat +datum=WGS84 +no_defs')
shape['GEOID'] = shape['GEOID'].astype(str)
output_columns = ['mean']
var = 'apcp'
for year in range(2000, 2019):
year_dir = './' + var + '/data_NEW_NEW/' + str(year) + '/'
if not os.path.exists(year_dir):
print('Making', year_dir)
year_files = glob.glob('./' + var + '/masked_daily_geotiff/' + str(year) + '/*.geotiff')
print('PROCESSING', len(year_files), 'FILES')
year_files = sorted(year_files)
for file in year_files:
name = file[file.rfind('/')+1:file.rfind('.')]
stats = rs.zonal_stats(shape, file, stats=output_columns, all_touched=True)
frame = pd.DataFrame.from_dict(stats).set_index(shape_frame['GEOID'])
frame.to_pickle(year_dir + name + '.pkl')
print(year_dir + name + '.pkl')
