Created
August 18, 2022 11:00
-
-
Save thedivtagguy/02d32f7b353f0ea6189cc34b948bbecb to your computer and use it in GitHub Desktop.
get data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# calculate long term averages for each month and also calculate week wise anomalies for 2022 | |
from datetime import datetime, timedelta | |
import os | |
import numpy as np | |
import rasterio as rio | |
from multiprocessing import Pool, cpu_count | |
# version changed from V06B to V06C on May 8, 2022 | |
dt_version_shift = datetime.strptime("20220508", '%Y%m%d') | |
def sum_rasters(dates): | |
total = np.zeros((1800, 3600)) | |
transform = None | |
for date in dates: | |
dt = datetime.strptime(date, '%Y%m%d') | |
version = 'V06C' if dt >= dt_version_shift else 'V06B' | |
with rio.open("../imerg-raw-data/" + date + "-imerg/" + f"3B-DAY-L.GIS.IMERG.{date}.{version}.liquid" + ".tif") as src: | |
transform = src.transform | |
total += src.read(1) | |
src.close() | |
return (total, transform) | |
def lta(dates, key): | |
print(f"Getting: {key}") | |
lta, transform = sum_rasters(dates) | |
lta /= len(dates) | |
if not os.path.exists("../lta/" + key): | |
os.mkdir("../lta/" + key) | |
with rio.open("../lta/" + key + "/border-crop-lta.tif", "w", driver="GTiff", width=3600, height=1800, count=1, dtype=np.float32, transform=transform) as dst: | |
dst.write(lta, 1) | |
dst.close() | |
if __name__ == '__main__': | |
pool = Pool(processes=cpu_count()) | |
dates = [x for x in os.listdir("../imerg-raw-data") if not x.endswith("Store") and not x.endswith("-imerg")] | |
dates_map = {} | |
dates_2022 = {} | |
for date in dates: | |
year, month, day = (date[0:4], date[4:6], date[6:8]) | |
key = month + "-" + day | |
if key not in dates_map: | |
dates_map[key] = [] | |
if key not in dates_2022: | |
dates_2022[key] = [] | |
# don't use 2022 data for long term average | |
if year != "2022": | |
dates_map[key].append(date) | |
else: | |
dates_2022[key].append(date) | |
# ucomment below two lines to calculate daily long term averages | |
# pool_data = [(dates_map[key], key) for key in dates_map] | |
# pool.starmap(lta, pool_data) | |
# calculate weekly anomalies for March, April, May, and June 2022 | |
# Week 10: March 6 - March 12 | |
# Week 11: March 13 - March 19 | |
# Week 12: March 20 - March 26 | |
# Week 13: March 27 - April 2 | |
# Week 14: April 3 - April 9 | |
# Week 15: April 10 - April 16 | |
# Week 16: April 17 - April 23 | |
# Week 17: April 24 - April 30 | |
# Week 18: May 1 - May 7 | |
# Week 19: May 8 - May 14 | |
# Week 20: May 15 - May 21 | |
# Week 21: May 22 - May 28 | |
# Week 22: May 29 - June 4 | |
# Week 23: June 5 - June 11 | |
# Week 24: June 12 - June 18 | |
# Week 25: June 19 - June 25 | |
# Week 26: June 26 - July 2 | |
# Week 27: July 3 - July 9 | |
# Week 28: July 10 - July 16 | |
# Week 29: July 17 - July 23 | |
# Week 30: July 24 - July 30 | |
# Week 31: August 1 - August 7 | |
# Week 32: August 8 - August 14 | |
# Week 33: August 15 - August 21 | |
# Week 34: August 22 - August 28 | |
# Week 35: August 29 - September 4 | |
week_dates = {} | |
# generate a mapping of week number to dates | |
for week in range(30, 31): | |
start_date = datetime(2022, 3, 5) + timedelta(days=7*(week-10)) | |
end_date = datetime(2022, 3, 6) + timedelta(days=7*(week-9)) | |
key = start_date.strftime("%m-%d") | |
while start_date <= end_date: | |
if week not in week_dates: | |
week_dates[week] = [] | |
week_dates[week].append(start_date.strftime("%Y%m%d")) | |
start_date += timedelta(days=1) | |
# now calculate lta for each week | |
# go through the dates of each week, remove the year part from the date | |
# search those dates in the main dates list above | |
# sum all those dates | |
# average by the number of weeks | |
# write to file | |
for week_num, week_dates in week_dates.items(): | |
print(f"Calculating week: {week_num}") | |
week_lta = np.zeros((1800, 3600)) | |
week_total_2022 = np.zeros((1800, 3600)) | |
# remove year + turn dates into keys to look up in the main dates map | |
dates_no_year = [x[4:6] + "-" + x[6:] for x in week_dates] | |
start_date = dates_no_year[0] | |
end_date = dates_no_year[-1] | |
# sum all the data for each of the above dates | |
total_days_count = 0 | |
total_2022_days_count = 0 | |
for key in dates_no_year: | |
if key in dates_map: | |
print(dates_map[key]) | |
(total_lt, transform_lt) = sum_rasters(dates_map[key]) | |
week_lta += total_lt | |
total_days_count += len(dates_map[key]) | |
if key in dates_2022: | |
(total_2022, transform_2022) = sum_rasters(dates_2022[key]) | |
week_total_2022 += total_2022 | |
# print(total_days_count, total_days_count / 7) | |
total_weeks = total_days_count / 7 | |
# average the data | |
week_lta /= total_weeks | |
# average for 2022 | |
# anomaly | |
# daily imerg data has a 0.1mm unit. so if actual rainfall was 10mm, it is 100 in the data. | |
week_2022_anomaly = (week_total_2022 - week_lta) * 0.1 | |
# make directtory | |
# if "../lta/week-" + str(week_num) does not exist, make it | |
if not os.path.exists("../lta/week-" + str(week_num)): | |
os.mkdir("../lta/week-" + str(week_num)) | |
# if not os.path.exists("../2022-anomalies/week-" + str(week_num)): | |
# os.mkdir("../2022-anomalies/week-" + str(week_num)) | |
# write the data | |
with rio.open("../lta/week-" + str(week_num) + f"/border-crop-lta-{start_date}-{end_date}.tif", "w", driver="GTiff", width=3600, height=1800, count=1, dtype=np.float32, transform=transform_lt) as dst: | |
dst.update_tags(IMPORTANT_UNIT = "0.1mm. So if actual rainfall was 10mm, it is 100 in the data.") | |
dst.write(week_lta, 1) | |
dst.close() | |
with rio.open(f"../2022-anomalies/border-crop-anomaly-{start_date}-{end_date}.tif", "w", driver="GTiff", width=3600, height=1800, count=1, dtype=np.float32, transform=transform_2022) as dst: | |
dst.update_tags(IMPORTANT_UNIT = "1mm. So if actual rainfall was 10mm, it is 10 in the data.") | |
dst.write(week_2022_anomaly, 1) | |
dst.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import os | |
import datetime | |
from multiprocessing import Pool, cpu_count | |
# version changed from V06B to V06C on May 8, 2022 | |
dt_version_shift = datetime.datetime.strptime("20220508", '%Y%m%d') | |
start_month = 7 | |
end_month = 8 | |
start_year = 2000 | |
end_year = 2022 | |
def getYear(year): | |
for month in range(start_month, end_month, 1): | |
for day in range(31, 0, -1): | |
date = str(year) + str(month).zfill(2) + str(day).zfill(2) | |
# doesn't exist for some reason. can check here: https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2019/06/ | |
if date == "20190630": | |
continue | |
try: | |
dt = datetime.datetime.strptime(date, '%Y%m%d') | |
# if more than day before yesterday's date, skip | |
if dt > datetime.datetime.now() - datetime.timedelta(days=2): | |
continue | |
except ValueError: | |
continue | |
# if folder exists continue | |
if os.path.exists('../imerg-raw-data/' + date): | |
continue | |
# Else, make it | |
else: | |
os.makedirs('../imerg-raw-data/' + date) | |
print(date) | |
# version changed from V06B to V06C on May 8, 2022 | |
version = 'V06C' if dt >= dt_version_shift else 'V06B' | |
file_name = f"3B-DAY-L.GIS.IMERG.{date}.{version}" | |
file_link = f"https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/{year}/{str(month).zfill(2)}/{file_name}.zip" | |
# download the imerg zip | |
command = f"curl -o ../imerg-raw-data/{date}.zip -O --ssl-reqd -u manas.sharma@thomsonreuters.com:manas.sharma@thomsonreuters.com {file_link}" | |
subprocess.call(command, shell=True) | |
# unzip and delete zip | |
command = f"unzip ../imerg-raw-data/{date}.zip -d ../imerg-raw-data/{date}-imerg && rm ../imerg-raw-data/{date}.zip" | |
subprocess.call(command, shell=True) | |
# sample code for clipping to a geojson | |
# # make directory | |
# command = f"mkdir ../imerg-clipped/{date}" | |
# subprocess.call(command, shell=True) | |
# command = f"/usr/local/bin/gdalwarp -cutline ../maps/border-crop.geojson ../imerg-raw-data/{date}- imerg/{file_name}.liquid.tif ../imerg-clipped/{date}/border-crop-{date}.tif" | |
# subprocess.call(command, shell=True) | |
# uncomment to delete raw data | |
# command = f"rm -rf ../imerg-raw-data/{date}-imerg" | |
# print(command) | |
# subprocess.call(command, shell=True) | |
if __name__ == '__main__': | |
pool = Pool(processes=cpu_count()) | |
pool.map(getYear, range(end_year, start_year, -1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment