Skip to content

Instantly share code, notes, and snippets.

@KMarkert
Created September 9, 2017 17:33
Show Gist options
  • Save KMarkert/4f490666880db6a37d8714c3d846f9d2 to your computer and use it in GitHub Desktop.
Save KMarkert/4f490666880db6a37d8714c3d846f9d2 to your computer and use it in GitHub Desktop.
Grabs NRT precipitation data from the FTP service of vrsg-servir.adpc.net and creates daily timeseries for a given polygon
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
vrg2timeseries.py, SERVIR-Mekong (2017-07-26)
Downloads data from SERVIR-Mekong's Virtual Rain and Stream Gauge (VRSG)
(vrsg-servir.adpc.net) and calculates a time series of the daily data over
an area
______________________________________________________________________
Usage
------
$ python vrg2timeseries.py {options}
{options} include:
--idate (-i) : required
: start date of the time series
: in format YYYY-MM-DD
--edate (-e) : required
: end date of the time series
: in format YYYY-MM-DD
--product (-p) : required
: precipitation product to create the time series for
: options are IMERG or GSMAP
--directory (-d) : path to directory to use as current working directory
: script will create a data folder to store the downloaded data
--shapefile : shapefile to extract the precipitation time series for
: if the shapefile argument is not given, the mean value for the entire image is used
Example Usage
-------------
1) create a time series from IMERG:
$ python vrg2timeseries.py -p IMERG -i 2017-01-01 -e 2017-01-31
2) create a time series from GSMAP
$ python vrg2timeseries.py -p GSMAP -i 2017-01-01 -e 2017-01-31
3) create a time series from IMERG over specific region
$ python vrg2timeseries.py -p IMERG -i 2017-01-01 -e 2017-01-31 -s /test.shp
4) create a time series from IMERG in specific directory
$ python vrg2timeseries.py -p IMERG -i 2017-01-01 -e 2017-01-31 -d /Users/user/Desktop/
"""
from __future__ import division,print_function
import os
import sys
import argparse
import ftplib
import datetime
import netCDF4
import numpy as np
import pandas as pd
from time import sleep
from osgeo import gdal,ogr
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'):
"""
Call in a loop to create terminal progress bar
@params:
iteration - Required : current iteration (Int)
total - Required : total iterations (Int)
prefix - Optional : prefix string (Str)
suffix - Optional : suffix string (Str)
decimals - Optional : positive number of decimals in percent complete (Int)
length - Optional : character length of bar (Int)
fill - Optional : bar fill character (Str)
"""
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
filledLength = int(length * iteration // total)
bar = fill * filledLength + '-' * (length - filledLength)
sys.stdout.write('\r%s |%s| %s%% %s\r' % (prefix, bar, percent, suffix))
# Print New Line on Complete
if iteration == total:
print()
return
class vrg2ts(object):
# initialize object
def __init__(self,directory,product,shapefile,iniTime,endTime):
self.directory = directory
self.product = product
self.shapefile = shapefile
self.iniTime = iniTime
self.endTime = endTime
stime = iniTime.split('-')
etime = endTime.split('-')
self.syr = stime[0]
self.smon = stime[1]
self.sday = stime[2]
self.eyr = etime[0]
self.emon = etime[1]
self.eday = etime[2]
def getPrecip(self):
ftpUrl = '58.137.55.93' # ftp url
# dates to process
self.t1 = datetime.date(int(self.syr),int(self.smon),int(self.sday))
self.t2 = datetime.date(int(self.eyr),int(self.emon),int(self.eday))
# find date offset
self.tOffSet = self.t2 - self.t1
# get number of iterations based on date offset
n_iter = self.tOffSet.days + 1
datadir = self.directory+'data/'+self.product+'/'
# handle output directory paths
if os.path.exists(self.directory+'data/') == False:
os.mkdir(self.directory+'data/')
if os.path.exists(datadir) == False:
os.mkdir(datadir)
# log into ftp
ftp = ftplib.FTP(ftpUrl)
ftp.login('downloader','Down0000')
for i in range(n_iter):
# get date to download data
now = self.t1 + datetime.timedelta(i)
# set variables for GSAMP product download
if self.product == 'GSMAP':
ftpDir = '/VRG/GSMAP/RT/DAILY/KH/{0}/{1:02d}/'.format(now.year,now.month)
ftpFile = 'KH_gsmap_gauge.{0}{1:02d}{2:02d}.0.1d.daily.00Z-23Z.nc'.format(now.year,now.month,now.day)
ftpPath = ftpDir + ftpFile
# set variables for IMERG product download
else:
ftpDir = '/VRG/IMERG/DAILY/{0}/{1:02d}/KH/'.format(now.year,now.month)
ftpFile = 'KH_3B-DAY-E.MS.MRG.3IMERG.{0}{1:02d}{2:02d}-S000000-E235959.V04.nc'.format(now.year,now.month,now.day)
ftpPath = ftpDir + ftpFile
# check if file exists and pass if it already does
if os.path.exists(datadir+ftpFile) == False:
with open(datadir+ftpFile, 'wb') as outfile:
ftp.retrbinary("RETR " + ftpPath, outfile.write)
# show download status bar
sleep(0.1)
printProgressBar(i + 1, n_iter, prefix='Download Progress:', suffix='Complete', length=40)
ftp.quit() # disconnect from ftp
return
def makeTimeSeries(self):
# set directory that data was saved to
datadir = self.directory+'data/'+self.product+'/'
# get number of iterations based on date offset
n_iter = self.tOffSet.days + 1
# set blank array to populated with calculated time series
ts = np.zeros([n_iter])
# loop over iterations
for i in range(n_iter):
# get date to process
now = self.t1 + datetime.timedelta(i)
# set variables for GSMAP product
if self.product == 'GSMAP':
filename = datadir+'KH_gsmap_gauge.{0}{1:02d}{2:02d}.0.1d.daily.00Z-23Z.nc'.format(now.year,now.month,now.day)
varName = 'precip'
#set variables for IMERG product
else:
filename = datadir+'KH_3B-DAY-E.MS.MRG.3IMERG.{0}{1:02d}{2:02d}-S000000-E235959.V04.nc'.format(now.year,now.month,now.day)
varName = 'precipitationCal'
# read in netCDF precipitation data
nc = netCDF4.Dataset(filename,'r')
pr_val = nc.variables[varName]
precip = pr_val[0,:,:]
# preprocessing of precip data
if self.product == 'IMERG':
precip = np.swapaxes(precip,0,1) # swap axes to lat then lon
precip = np.flipud(precip) # flip along y-axis
precip = np.ma.masked_where(precip<0,precip) # mask data less than 0
# check if a shapefile argument was specified
if self.shapefile != None:
# if so make raster mask from shapefile during first pass
if i == 0:
lons = nc.variables['lon']
lats = nc.variables['lat']
mask = self.makeMask(lons[:],lats[:],0.1)
# mask the precip data
precip = np.ma.masked_where(mask==0,precip)
# get mean for area
ts[i] = np.mean(precip)
nc.close() # close netCDF file
# show processing status bar
sleep(0.1)
printProgressBar(i + 1, n_iter, prefix='Time Series Progress:', suffix='Complete', length=40)
# set output file path for csv
outfile = self.directory + 'vrg_{0}_timeseries_{1}_{2}.csv'.format(self.product,self.t1.strftime("%Y-%m-%d").replace('-',''),self.t2.strftime("%Y-%m-%d").replace('-',''))
# get date range for time series
dates = pd.date_range(self.t1, self.t2, freq='D')
p_series = pd.Series(ts, index=dates)
# populate data to pandas data frame and set column names
df = pd.DataFrame(p_series)
df.index.name = 'Date'
df.columns = ['Precipitation']
df.to_csv(outfile) # save file
return
def makeMask(self,lon,lat,res):
# read in shapefile
source_ds = ogr.Open(self.shapefile)
source_layer = source_ds.GetLayer()
# Create high res raster in memory
mem_ds = gdal.GetDriverByName('MEM').Create('', lon.size, lat.size, gdal.GDT_Byte)
mem_ds.SetGeoTransform((lon.min(), res, 0, lat.max(), 0, -res))
band = mem_ds.GetRasterBand(1)
# Rasterize shapefile to grid
gdal.RasterizeLayer(mem_ds, [1], source_layer, burn_values=[1])
# Get rasterized shapefile as numpy array
array = band.ReadAsArray()
# Flush memory file
mem_ds = None
band = None
return array
def main():
# set argument parsing object
parser = argparse.ArgumentParser(description="Download netCDF data from SERVIR Mekong's Virtual Rain and \
and Stream Gauge tool and convert it to a time series")
# specify arguments for script and help informations
parser.add_argument('--idate','-i', type=str,required=True,
help="Start date to download data and process time series in format 'YYYY-MM-DD'")
parser.add_argument('--edate','-e', type=str,required=True,
help="End date to download data and process time series in format 'YYYY-MM-DD'")
parser.add_argument('--product','-p',choices=['IMERG','GSMAP'],required=True,
help='VRSG product to create time series from')
parser.add_argument('--directory','-d',type=str,
help='Folder directory to store the downloaded netCDF data')
parser.add_argument('--shapefile','-s',type=str,
help='Shapefile to extract the time series for')
args = parser.parse_args() # get arguments
# check if directory argument was passed
if args.directory == None:
# if not, give information
print('No directory was given for output data...\nUsing current working directory:{}\n'.format(os.getcwd()))
# if not, set to current working directory
args.directory = os.getcwd()+'/'
# check if shapefile argument was passed
if args.shapefile == None:
# if not, give information
print('No shapefile was given for spatial averaging...\nUsing entire raster area\n')
# check if product argument is what was expected
if args.product not in ['IMERG','GSMAP']:
print('Product type not recognized: {0}\nPlease select either IMERG or GSMAP',format(args.product))
sys.exit()
# initialize processing object
vrgts = vrg2ts(args.directory,args.product,args.shapefile,args.idate,args.edate)
vrgts.getPrecip() # download precip data
vrgts.makeTimeSeries() # process it to time series
return
if __name__ == "__main__":
main() # do the process
print('Processing complete, exiting...') # print when finished
sys.exit() # exit python
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment