Skip to content

Instantly share code, notes, and snippets.

@thedivtagguy
Created August 18, 2022 11:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thedivtagguy/02d32f7b353f0ea6189cc34b948bbecb to your computer and use it in GitHub Desktop.
Save thedivtagguy/02d32f7b353f0ea6189cc34b948bbecb to your computer and use it in GitHub Desktop.
get data
# calculate long term averages for each month and also calculate week wise anomalies for 2022
from datetime import datetime, timedelta
import os
import numpy as np
import rasterio as rio
from multiprocessing import Pool, cpu_count
# version changed from V06B to V06C on May 8, 2022
dt_version_shift = datetime.strptime("20220508", '%Y%m%d')
def sum_rasters(dates):
total = np.zeros((1800, 3600))
transform = None
for date in dates:
dt = datetime.strptime(date, '%Y%m%d')
version = 'V06C' if dt >= dt_version_shift else 'V06B'
with rio.open("../imerg-raw-data/" + date + "-imerg/" + f"3B-DAY-L.GIS.IMERG.{date}.{version}.liquid" + ".tif") as src:
transform = src.transform
total += src.read(1)
src.close()
return (total, transform)
def lta(dates, key):
print(f"Getting: {key}")
lta, transform = sum_rasters(dates)
lta /= len(dates)
if not os.path.exists("../lta/" + key):
os.mkdir("../lta/" + key)
with rio.open("../lta/" + key + "/border-crop-lta.tif", "w", driver="GTiff", width=3600, height=1800, count=1, dtype=np.float32, transform=transform) as dst:
dst.write(lta, 1)
dst.close()
if __name__ == '__main__':
pool = Pool(processes=cpu_count())
dates = [x for x in os.listdir("../imerg-raw-data") if not x.endswith("Store") and not x.endswith("-imerg")]
dates_map = {}
dates_2022 = {}
for date in dates:
year, month, day = (date[0:4], date[4:6], date[6:8])
key = month + "-" + day
if key not in dates_map:
dates_map[key] = []
if key not in dates_2022:
dates_2022[key] = []
# don't use 2022 data for long term average
if year != "2022":
dates_map[key].append(date)
else:
dates_2022[key].append(date)
# ucomment below two lines to calculate daily long term averages
# pool_data = [(dates_map[key], key) for key in dates_map]
# pool.starmap(lta, pool_data)
# calculate weekly anomalies for March, April, May, and June 2022
# Week 10: March 6 - March 12
# Week 11: March 13 - March 19
# Week 12: March 20 - March 26
# Week 13: March 27 - April 2
# Week 14: April 3 - April 9
# Week 15: April 10 - April 16
# Week 16: April 17 - April 23
# Week 17: April 24 - April 30
# Week 18: May 1 - May 7
# Week 19: May 8 - May 14
# Week 20: May 15 - May 21
# Week 21: May 22 - May 28
# Week 22: May 29 - June 4
# Week 23: June 5 - June 11
# Week 24: June 12 - June 18
# Week 25: June 19 - June 25
# Week 26: June 26 - July 2
# Week 27: July 3 - July 9
# Week 28: July 10 - July 16
# Week 29: July 17 - July 23
# Week 30: July 24 - July 30
# Week 31: August 1 - August 7
# Week 32: August 8 - August 14
# Week 33: August 15 - August 21
# Week 34: August 22 - August 28
# Week 35: August 29 - September 4
week_dates = {}
# generate a mapping of week number to dates
for week in range(30, 31):
start_date = datetime(2022, 3, 5) + timedelta(days=7*(week-10))
end_date = datetime(2022, 3, 6) + timedelta(days=7*(week-9))
key = start_date.strftime("%m-%d")
while start_date <= end_date:
if week not in week_dates:
week_dates[week] = []
week_dates[week].append(start_date.strftime("%Y%m%d"))
start_date += timedelta(days=1)
# now calculate lta for each week
# go through the dates of each week, remove the year part from the date
# search those dates in the main dates list above
# sum all those dates
# average by the number of weeks
# write to file
for week_num, week_dates in week_dates.items():
print(f"Calculating week: {week_num}")
week_lta = np.zeros((1800, 3600))
week_total_2022 = np.zeros((1800, 3600))
# remove year + turn dates into keys to look up in the main dates map
dates_no_year = [x[4:6] + "-" + x[6:] for x in week_dates]
start_date = dates_no_year[0]
end_date = dates_no_year[-1]
# sum all the data for each of the above dates
total_days_count = 0
total_2022_days_count = 0
for key in dates_no_year:
if key in dates_map:
print(dates_map[key])
(total_lt, transform_lt) = sum_rasters(dates_map[key])
week_lta += total_lt
total_days_count += len(dates_map[key])
if key in dates_2022:
(total_2022, transform_2022) = sum_rasters(dates_2022[key])
week_total_2022 += total_2022
# print(total_days_count, total_days_count / 7)
total_weeks = total_days_count / 7
# average the data
week_lta /= total_weeks
# average for 2022
# anomaly
# daily imerg data has a 0.1mm unit. so if actual rainfall was 10mm, it is 100 in the data.
week_2022_anomaly = (week_total_2022 - week_lta) * 0.1
# make directtory
# if "../lta/week-" + str(week_num) does not exist, make it
if not os.path.exists("../lta/week-" + str(week_num)):
os.mkdir("../lta/week-" + str(week_num))
# if not os.path.exists("../2022-anomalies/week-" + str(week_num)):
# os.mkdir("../2022-anomalies/week-" + str(week_num))
# write the data
with rio.open("../lta/week-" + str(week_num) + f"/border-crop-lta-{start_date}-{end_date}.tif", "w", driver="GTiff", width=3600, height=1800, count=1, dtype=np.float32, transform=transform_lt) as dst:
dst.update_tags(IMPORTANT_UNIT = "0.1mm. So if actual rainfall was 10mm, it is 100 in the data.")
dst.write(week_lta, 1)
dst.close()
with rio.open(f"../2022-anomalies/border-crop-anomaly-{start_date}-{end_date}.tif", "w", driver="GTiff", width=3600, height=1800, count=1, dtype=np.float32, transform=transform_2022) as dst:
dst.update_tags(IMPORTANT_UNIT = "1mm. So if actual rainfall was 10mm, it is 10 in the data.")
dst.write(week_2022_anomaly, 1)
dst.close()
import subprocess
import os
import datetime
from multiprocessing import Pool, cpu_count
# version changed from V06B to V06C on May 8, 2022
dt_version_shift = datetime.datetime.strptime("20220508", '%Y%m%d')
start_month = 7
end_month = 8
start_year = 2000
end_year = 2022
def getYear(year):
for month in range(start_month, end_month, 1):
for day in range(31, 0, -1):
date = str(year) + str(month).zfill(2) + str(day).zfill(2)
# doesn't exist for some reason. can check here: https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2019/06/
if date == "20190630":
continue
try:
dt = datetime.datetime.strptime(date, '%Y%m%d')
# if more than day before yesterday's date, skip
if dt > datetime.datetime.now() - datetime.timedelta(days=2):
continue
except ValueError:
continue
# if folder exists continue
if os.path.exists('../imerg-raw-data/' + date):
continue
# Else, make it
else:
os.makedirs('../imerg-raw-data/' + date)
print(date)
# version changed from V06B to V06C on May 8, 2022
version = 'V06C' if dt >= dt_version_shift else 'V06B'
file_name = f"3B-DAY-L.GIS.IMERG.{date}.{version}"
file_link = f"https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/{year}/{str(month).zfill(2)}/{file_name}.zip"
# download the imerg zip
command = f"curl -o ../imerg-raw-data/{date}.zip -O --ssl-reqd -u manas.sharma@thomsonreuters.com:manas.sharma@thomsonreuters.com {file_link}"
subprocess.call(command, shell=True)
# unzip and delete zip
command = f"unzip ../imerg-raw-data/{date}.zip -d ../imerg-raw-data/{date}-imerg && rm ../imerg-raw-data/{date}.zip"
subprocess.call(command, shell=True)
# sample code for clipping to a geojson
# # make directory
# command = f"mkdir ../imerg-clipped/{date}"
# subprocess.call(command, shell=True)
# command = f"/usr/local/bin/gdalwarp -cutline ../maps/border-crop.geojson ../imerg-raw-data/{date}- imerg/{file_name}.liquid.tif ../imerg-clipped/{date}/border-crop-{date}.tif"
# subprocess.call(command, shell=True)
# uncomment to delete raw data
# command = f"rm -rf ../imerg-raw-data/{date}-imerg"
# print(command)
# subprocess.call(command, shell=True)
if __name__ == '__main__':
pool = Pool(processes=cpu_count())
pool.map(getYear, range(end_year, start_year, -1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment