aolinto/TRMM_precipitation.R

## TRMM_precipitation.R
# ==========================================================================================
# Antonio Olinto Avila-da-Silva, Instituto de Pesca, Brasil
# version 2021/04/19
# https://gist.github.com/aolinto/3a0872e0fb61b7ca3e69d35d286ae26c
# script to process Tropical Rainfall Measuring Mission (TRMM) data
# 3B43.7: Monthly 0.25 x 0.25 degree merged TRMM and other sources estimates
# files downloaded from
# https://disc.gsfc.nasa.gov/
# select: TRMM_3b43_7 / Subset-Get data /
# Download: OPeNDAP / variables: precipitation / file format: netCDF
# there was a bug in the area indication. I had to write latmin, lonmin, latmax, lonmax
# -29,-49.25,-20.75, -39 instead -49.25, -29, -39, -20.75
# all .HDF.nc4 files must be in the working directory
# the script will open each nc file to read date information
# the script will also transform nc file to raster, read precipitation data
# for a given area, compute its statistics and write them into
# a single csv file named TRMM_precip.csv
# ==========================================================================================

# load libraries
# ncdf4 needs libnetcdf-dev netcdf-bin in Linux
# rgeos needs libgeos-dev
# install.packages(c("rgeos","maptools","ncdf4","raster"))
library(maptools)
library(ncdf4)
library(raster)
library(rgdal)

# set working directory
setwd("/.../TRMM_3B43_7")   # indicate the path to the files

# verify the existence of MODISA_sst.csv file
file.exists("TRMM_precip.csv") # caution! new data will be appended to this file if it already exists
# if TRUE choose an option
# file.rename("TRMM_precip.csv","TRMM_precip.org")
# file.remove("TRMM_precip.csv")

# list and remove objects from workspace
ls()
rm(list = ls())

# create a list of nc files and indicate its length
(f <- list.files(".", pattern="*.HDF.nc",full.names=F))
(lf <- length(f))

# load shapefiles
# study area
shp.area <- readOGR("/.../Shapes","precipitation_area") # indicate the name and location of the shapefile
shp.area <- spTransform(shp.area, CRS("+proj=longlat +datum=WGS84"))
proj4string(shp.area)
summary(shp.area)

# land area
shp.land2 <- getData('GADM', country='BRA', level=2)
shp.land2 <- spTransform(shp.land2, CRS("+proj=longlat +datum=WGS84"))
proj4string(shp.land2)

# plot shapefile

#~ png(filename="fig_precipitation_area.png",width=2034,height=2034,units="px",pointsize=12,bg="white",res=300)
plot(shp.land2,border="darkgreen",col="gray",axes=T,xlim=c(-47.8,-45.2),ylim=c(-25,-23.5))
plot(shp.area,col="lightblue",alpha=0.5,add=T)
#~ dev.off()

# get the spatial extent of the shapefile
ext.area <- extent(shp.area)

for (i in 1:lf) {

  # progress indicator
  print(sprintf("Processing file %i from %s: %s",i,length(f),f[i]))

  # open netCDF file
  nc.data <- nc_open(f[i])

  # extract date information from nc file
  dateini <- substr(strsplit(ncatt_get(nc.data,0,"FileHeader")$value,";\n")[[1]][5],22,45)
  dateend <- substr(strsplit(ncatt_get(nc.data,0,"FileHeader")$value,";\n")[[1]][6],21,45)
  datemean <- mean(c(as.Date(dateend,"%Y-%m-%dT%H:%M:%OSZ"),as.Date(dateini,"%Y-%m-%dT%H:%M:%OSZ")))
  year <- substr(datemean,0,4)
  month <- substr(datemean,6,7)

  # close netCDF file
  nc_close(nc.data)

  # create a raster from nc file
  rst.data <- raster(f[i],varname="precipitation")
  proj4string(rst.data) <- CRS("+proj=longlat +datum=WGS84")

  # crop the raster to area extent
  crp.data <- crop(rst.data,ext.area,snap="out")

  # create a dummy raster with NAs
  crp.na <- setValues(crp.data,NA)

  # create a raster mask with the area boundaries
  rst.mask <- rasterize(shp.area,crp.na)

  # apply the mask to the raster with data
  msk.data <- mask(x=crp.data,mask=rst.mask)

  # get statistics
  sta.min <- cellStats(msk.data, stat='min',na.rm=TRUE)
  sta.mean <- cellStats(msk.data, stat='mean',na.rm=TRUE)
  sta.median <- median(na.omit(values(msk.data)))
  sta.max <- cellStats(msk.data, stat='max',na.rm=TRUE)

  # prepare final data set
  dat.output <- data.frame(year,month,sta.min,sta.mean,sta.median,sta.max)
  names(dat.output) <- c("year","month","PREmin","PREmean","PREmedian","PREmax")

  # save csv file
  fe <- file.exists("TRMM_pre.csv")
  write.table(dat.output,"TRMM_pre.csv",row.names=FALSE,col.names=!fe,sep=",",dec=".",append=fe) # change separator and decimal strings if necessary

  # clean workspace
  rm(nc.data,dateini,dateend,datemean,year,month,rst.data,crp.data,crp.na,rst.mask,msk.data,sta.min,sta.mean,sta.median,sta.max,dat.output,fe)
}

rm(ext.area,f,i,lf,shp.area)
	# ==========================================================================================
	# Antonio Olinto Avila-da-Silva, Instituto de Pesca, Brasil
	# version 2021/04/19
	# https://gist.github.com/aolinto/3a0872e0fb61b7ca3e69d35d286ae26c
	# script to process Tropical Rainfall Measuring Mission (TRMM) data
	# 3B43.7: Monthly 0.25 x 0.25 degree merged TRMM and other sources estimates
	# files downloaded from
	# https://disc.gsfc.nasa.gov/
	# select: TRMM_3b43_7 / Subset-Get data /
	# Download: OPeNDAP / variables: precipitation / file format: netCDF
	# there was a bug in the area indication. I had to write latmin, lonmin, latmax, lonmax
	# -29,-49.25,-20.75, -39 instead -49.25, -29, -39, -20.75
	# all .HDF.nc4 files must be in the working directory
	# the script will open each nc file to read date information
	# the script will also transform nc file to raster, read precipitation data
	# for a given area, compute its statistics and write them into
	# a single csv file named TRMM_precip.csv
	# ==========================================================================================

	# load libraries
	# ncdf4 needs libnetcdf-dev netcdf-bin in Linux
	# rgeos needs libgeos-dev
	# install.packages(c("rgeos","maptools","ncdf4","raster"))
	library(maptools)
	library(ncdf4)
	library(raster)
	library(rgdal)

	# set working directory
	setwd("/.../TRMM_3B43_7") # indicate the path to the files

	# verify the existence of MODISA_sst.csv file
	file.exists("TRMM_precip.csv") # caution! new data will be appended to this file if it already exists
	# if TRUE choose an option
	# file.rename("TRMM_precip.csv","TRMM_precip.org")
	# file.remove("TRMM_precip.csv")

	# list and remove objects from workspace
	ls()
	rm(list = ls())

	# create a list of nc files and indicate its length
	(f <- list.files(".", pattern="*.HDF.nc",full.names=F))
	(lf <- length(f))

	# load shapefiles
	# study area
	shp.area <- readOGR("/.../Shapes","precipitation_area") # indicate the name and location of the shapefile
	shp.area <- spTransform(shp.area, CRS("+proj=longlat +datum=WGS84"))
	proj4string(shp.area)
	summary(shp.area)

	# land area
	shp.land2 <- getData('GADM', country='BRA', level=2)
	shp.land2 <- spTransform(shp.land2, CRS("+proj=longlat +datum=WGS84"))
	proj4string(shp.land2)

	# plot shapefile

	#~ png(filename="fig_precipitation_area.png",width=2034,height=2034,units="px",pointsize=12,bg="white",res=300)
	plot(shp.land2,border="darkgreen",col="gray",axes=T,xlim=c(-47.8,-45.2),ylim=c(-25,-23.5))
	plot(shp.area,col="lightblue",alpha=0.5,add=T)
	#~ dev.off()

	# get the spatial extent of the shapefile
	ext.area <- extent(shp.area)

	for (i in 1:lf) {

	# progress indicator
	print(sprintf("Processing file %i from %s: %s",i,length(f),f[i]))

	# open netCDF file
	nc.data <- nc_open(f[i])

	# extract date information from nc file
	dateini <- substr(strsplit(ncatt_get(nc.data,0,"FileHeader")$value,";\n")[[1]][5],22,45)
	dateend <- substr(strsplit(ncatt_get(nc.data,0,"FileHeader")$value,";\n")[[1]][6],21,45)
	datemean <- mean(c(as.Date(dateend,"%Y-%m-%dT%H:%M:%OSZ"),as.Date(dateini,"%Y-%m-%dT%H:%M:%OSZ")))
	year <- substr(datemean,0,4)
	month <- substr(datemean,6,7)

	# close netCDF file
	nc_close(nc.data)

	# create a raster from nc file
	rst.data <- raster(f[i],varname="precipitation")
	proj4string(rst.data) <- CRS("+proj=longlat +datum=WGS84")

	# crop the raster to area extent
	crp.data <- crop(rst.data,ext.area,snap="out")

	# create a dummy raster with NAs
	crp.na <- setValues(crp.data,NA)

	# create a raster mask with the area boundaries
	rst.mask <- rasterize(shp.area,crp.na)

	# apply the mask to the raster with data
	msk.data <- mask(x=crp.data,mask=rst.mask)

	# get statistics
	sta.min <- cellStats(msk.data, stat='min',na.rm=TRUE)
	sta.mean <- cellStats(msk.data, stat='mean',na.rm=TRUE)
	sta.median <- median(na.omit(values(msk.data)))
	sta.max <- cellStats(msk.data, stat='max',na.rm=TRUE)

	# prepare final data set
	dat.output <- data.frame(year,month,sta.min,sta.mean,sta.median,sta.max)
	names(dat.output) <- c("year","month","PREmin","PREmean","PREmedian","PREmax")

	# save csv file
	fe <- file.exists("TRMM_pre.csv")
	write.table(dat.output,"TRMM_pre.csv",row.names=FALSE,col.names=!fe,sep=",",dec=".",append=fe) # change separator and decimal strings if necessary

	# clean workspace
	rm(nc.data,dateini,dateend,datemean,year,month,rst.data,crp.data,crp.na,rst.mask,msk.data,sta.min,sta.mean,sta.median,sta.max,dat.output,fe)
	}

	rm(ext.area,f,i,lf,shp.area)