Skip to content

Instantly share code, notes, and snippets.

@aolinto
Last active November 21, 2022 23:16
Show Gist options
  • Save aolinto/f4aa502aed29a3ea6e8b387df207595b to your computer and use it in GitHub Desktop.
Save aolinto/f4aa502aed29a3ea6e8b387df207595b to your computer and use it in GitHub Desktop.
script to process Tropical Rainfall Measuring Mission (TRMM) data
# ==========================================================================================
# Antonio Olinto Avila-da-Silva, Instituto de Pesca, Brasil
# version 2021/04/19
# https://gist.github.com/aolinto/3a0872e0fb61b7ca3e69d35d286ae26c
# script to process Tropical Rainfall Measuring Mission (TRMM) data
# 3B43.7: Monthly 0.25 x 0.25 degree merged TRMM and other sources estimates
# files downloaded from
# https://disc.gsfc.nasa.gov/
# select: TRMM_3b43_7 / Subset-Get data /
# Download: OPeNDAP / variables: precipitation / file format: netCDF
# there was a bug in the area indication. I had to write latmin, lonmin, latmax, lonmax
# -29,-49.25,-20.75, -39 instead -49.25, -29, -39, -20.75
# all .HDF.nc4 files must be in the working directory
# the script will open each nc file to read date information
# the script will also transform nc file to raster, read precipitation data
# for a given area, compute its statistics and write them into
# a single csv file named TRMM_precip.csv
# ==========================================================================================
# load libraries
# ncdf4 needs libnetcdf-dev netcdf-bin in Linux
# rgeos needs libgeos-dev
# install.packages(c("rgeos","maptools","ncdf4","raster"))
library(maptools)
library(ncdf4)
library(raster)
library(rgdal)
# set working directory
setwd("/.../TRMM_3B43_7") # indicate the path to the files
# verify the existence of MODISA_sst.csv file
file.exists("TRMM_precip.csv") # caution! new data will be appended to this file if it already exists
# if TRUE choose an option
# file.rename("TRMM_precip.csv","TRMM_precip.org")
# file.remove("TRMM_precip.csv")
# list and remove objects from workspace
ls()
rm(list = ls())
# create a list of nc files and indicate its length
(f <- list.files(".", pattern="*.HDF.nc",full.names=F))
(lf <- length(f))
# load shapefiles
# study area
shp.area <- readOGR("/.../Shapes","precipitation_area") # indicate the name and location of the shapefile
shp.area <- spTransform(shp.area, CRS("+proj=longlat +datum=WGS84"))
proj4string(shp.area)
summary(shp.area)
# land area
shp.land2 <- getData('GADM', country='BRA', level=2)
shp.land2 <- spTransform(shp.land2, CRS("+proj=longlat +datum=WGS84"))
proj4string(shp.land2)
# plot shapefile
#~ png(filename="fig_precipitation_area.png",width=2034,height=2034,units="px",pointsize=12,bg="white",res=300)
plot(shp.land2,border="darkgreen",col="gray",axes=T,xlim=c(-47.8,-45.2),ylim=c(-25,-23.5))
plot(shp.area,col="lightblue",alpha=0.5,add=T)
#~ dev.off()
# get the spatial extent of the shapefile
ext.area <- extent(shp.area)
for (i in 1:lf) {
# progress indicator
print(sprintf("Processing file %i from %s: %s",i,length(f),f[i]))
# open netCDF file
nc.data <- nc_open(f[i])
# extract date information from nc file
dateini <- substr(strsplit(ncatt_get(nc.data,0,"FileHeader")$value,";\n")[[1]][5],22,45)
dateend <- substr(strsplit(ncatt_get(nc.data,0,"FileHeader")$value,";\n")[[1]][6],21,45)
datemean <- mean(c(as.Date(dateend,"%Y-%m-%dT%H:%M:%OSZ"),as.Date(dateini,"%Y-%m-%dT%H:%M:%OSZ")))
year <- substr(datemean,0,4)
month <- substr(datemean,6,7)
# close netCDF file
nc_close(nc.data)
# create a raster from nc file
rst.data <- raster(f[i],varname="precipitation")
proj4string(rst.data) <- CRS("+proj=longlat +datum=WGS84")
# crop the raster to area extent
crp.data <- crop(rst.data,ext.area,snap="out")
# create a dummy raster with NAs
crp.na <- setValues(crp.data,NA)
# create a raster mask with the area boundaries
rst.mask <- rasterize(shp.area,crp.na)
# apply the mask to the raster with data
msk.data <- mask(x=crp.data,mask=rst.mask)
# get statistics
sta.min <- cellStats(msk.data, stat='min',na.rm=TRUE)
sta.mean <- cellStats(msk.data, stat='mean',na.rm=TRUE)
sta.median <- median(na.omit(values(msk.data)))
sta.max <- cellStats(msk.data, stat='max',na.rm=TRUE)
# prepare final data set
dat.output <- data.frame(year,month,sta.min,sta.mean,sta.median,sta.max)
names(dat.output) <- c("year","month","PREmin","PREmean","PREmedian","PREmax")
# save csv file
fe <- file.exists("TRMM_pre.csv")
write.table(dat.output,"TRMM_pre.csv",row.names=FALSE,col.names=!fe,sep=",",dec=".",append=fe) # change separator and decimal strings if necessary
# clean workspace
rm(nc.data,dateini,dateend,datemean,year,month,rst.data,crp.data,crp.na,rst.mask,msk.data,sta.min,sta.mean,sta.median,sta.max,dat.output,fe)
}
rm(ext.area,f,i,lf,shp.area)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment