aolinto/MODISA_sst_grid.R

## MODISA_sst_grid.R
# Antonio Olinto Avila-da-Silva, Instituto de Pesca, Brasil
# https://gist.github.com/aolinto/2099bbf7af343e2b9f919f5317c508b9
# script to process Aqua MODIS Sea Surface Temperature
# files downloaded from https://oceancolor.gsfc.nasa.gov/cgi/l3
# Aqua MODIS Sea Surface temperature 11 µ daytime Monthly 4 km SMI images or
# Aqua MODIS Sea Surface temperature 11 µ nigthtime Monthly 4 km SMI images
# all .L3m_MO_SST_sst_4km.nc (daytime) or .L3m_MO_NST4_sst4_4km (nigthtime)
# files must be in the working directory
# the script will open each nc file to read date information
# the script will also transform nc file to raster, read sst data
# for a multi-feature area indicated in shapefile, compute statistics
# for each features and write them into a single csv file named
# MODISA_nsst.csv
# Some reference pages
# http://geog.uoregon.edu/GeogR/topics/netCDF-read-ncdf4.html
# https://scottishsnow.wordpress.com/2014/08/24/many-rastered-beast/
# version 2019/02/27

# load libraries
# ncdf4 needs libnetcdf-dev netcdf-bin in Linux
# for rgdal
# sudo apt-get install gdal-bin proj-bin libgdal-dev libproj-dev
# install.packages(c("ncdf4","rgdal","maptools","raster"))
library(ncdf4)
library(rgdal)
library(maptools)
library(raster)

# list and remove objects from workspace
ls()
rm(list = ls())

# set working directory
setwd("/...")   # indicate the path to the nc file
dir() # check files

# verify the existence of MODISA_nsst.csv file
file.exists("MODISA_nsst.csv") # caution! new data will be appended to this file if it already exists
# if TRUE choose an option
#~ file.rename("MODISA_nsst.csv","MODISA_nsst.org")
#~ file.remove("MODISA_nsst.csv")

# create a list of nc files and indicate its length
(f <- list.files(".", pattern="*.L3m_MO_NSST_sst_4km.nc",full.names=F)) # for daytime *.L3m_MO_SST_sst_4km.nc
(lf <- length(f))

# shapefile for a multi-feature study area
shp.area <- readOGR(layer="shapename",dsn="/....") # indicate the name and location of the shapefile
shp.area <- spTransform(shp.area, CRS("+proj=longlat +datum=WGS84"))
proj4string(shp.area)
summary(shp.area)
lst.feat <- levels(shp.area[[2]]) # indicate the column with unique codes for each features

# spatial extent of the shapefile
ext.area <- extent(shp.area)

# land area
shp.land <- getData('GADM', country='BRA', level=0)
shp.land <- spTransform(shp.land, CRS("+proj=longlat +datum=WGS84"))
proj4string(shp.land)

# plot study area and land
plot(shp.area,col="lightblue",axes=T)
plot(shp.land,border="darkgreen",col="gray",add=T)

# looping for netCDF files
for (i in 1:lf) {
  # open netCDF file
  nc.data <- nc_open(f[i])

  # extract date information from nc file
  dateini <- ncatt_get(nc.data,0,"time_coverage_start")$value
  dateend <- ncatt_get(nc.data,0,"time_coverage_end")$value
  datemean <- mean(c(as.Date(dateend,"%Y-%m-%dT%H:%M:%OSZ"),as.Date(dateini,"%Y-%m-%dT%H:%M:%OSZ")))
  year <- as.numeric(substring(datemean,0,4))
  month <- as.numeric(substring(datemean,6,7))

  # close netCDF file
  nc_close(nc.data)

  # create a raster with sst data from netCDF file
  ras.data <- raster(f[i],varname="sst") # indicate the nc file
  proj4string(ras.data) <- CRS("+proj=longlat +datum=WGS84")

  # crop the raster to area extent
  crp.data <- crop(ras.data,ext.area,snap="near")
  rm(ras.data)

  # set values higher than 45 to NA
  crp.data[crp.data>=45] <- NA

  # plot study area, land and sst data
  #~ plot(shp.area,axes=T)
  #~ plot(shp.land,border="darkgreen",col="gray",add=T)
  #~ plot(crp.data,add=T)
  #~ plot(shp.area,add=T)

  # looping for features
  for (j in 1:length(lst.feat)) {

    # progress indicator
    print(sprintf("Processing file %i from %s feat %i from %i: %s %s",i,length(f),j,length(lst.feat),f[i],lst.feat[j]))

    # select crp.data area under a feature
    fea.data <- unlist((crp.data[shp.area[shp.area$cod_area==lst.feat[j],],]),use.names=FALSE)

    # get statistics
    sta.min <- min(na.omit(fea.data))
    sta.mean <- mean(na.omit(fea.data))
    sta.median <- median(na.omit(fea.data))
    sta.max <- max(na.omit(fea.data))

    # prepare final data set
    dat.output <- data.frame(year,month,lst.feat[j],sta.min,sta.mean,sta.median,sta.max)
    names(dat.output)<-c("year","month","feature","SSTmin","SSTmean","SSTmedian","SSTmax")

    # save csv file
    fe <- file.exists("MODISA_nsst.csv")
    write.table(dat.output,"MODISA_nsst.csv",row.names=FALSE,col.names=!fe,sep=",",dec=".",append=fe) # change separator and decimal strings if necessary

    # clean workspace
    rm(fea.data,sta.min,sta.mean,sta.median,sta.max,dat.output,fe)
  }

  # clean workspace
  rm(nc.data,dateini,dateend,datemean,year,month,ras.data,crp.data)
}
	# Antonio Olinto Avila-da-Silva, Instituto de Pesca, Brasil
	# https://gist.github.com/aolinto/2099bbf7af343e2b9f919f5317c508b9
	# script to process Aqua MODIS Sea Surface Temperature
	# files downloaded from https://oceancolor.gsfc.nasa.gov/cgi/l3
	# Aqua MODIS Sea Surface temperature 11 µ daytime Monthly 4 km SMI images or
	# Aqua MODIS Sea Surface temperature 11 µ nigthtime Monthly 4 km SMI images
	# all .L3m_MO_SST_sst_4km.nc (daytime) or .L3m_MO_NST4_sst4_4km (nigthtime)
	# files must be in the working directory
	# the script will open each nc file to read date information
	# the script will also transform nc file to raster, read sst data
	# for a multi-feature area indicated in shapefile, compute statistics
	# for each features and write them into a single csv file named
	# MODISA_nsst.csv
	# Some reference pages
	# http://geog.uoregon.edu/GeogR/topics/netCDF-read-ncdf4.html
	# https://scottishsnow.wordpress.com/2014/08/24/many-rastered-beast/
	# version 2019/02/27

	# load libraries
	# ncdf4 needs libnetcdf-dev netcdf-bin in Linux
	# for rgdal
	# sudo apt-get install gdal-bin proj-bin libgdal-dev libproj-dev
	# install.packages(c("ncdf4","rgdal","maptools","raster"))
	library(ncdf4)
	library(rgdal)
	library(maptools)
	library(raster)

	# list and remove objects from workspace
	ls()
	rm(list = ls())

	# set working directory
	setwd("/...") # indicate the path to the nc file
	dir() # check files

	# verify the existence of MODISA_nsst.csv file
	file.exists("MODISA_nsst.csv") # caution! new data will be appended to this file if it already exists
	# if TRUE choose an option
	#~ file.rename("MODISA_nsst.csv","MODISA_nsst.org")
	#~ file.remove("MODISA_nsst.csv")

	# create a list of nc files and indicate its length
	(f <- list.files(".", pattern=".L3m_MO_NSST_sst_4km.nc",full.names=F)) # for daytime .L3m_MO_SST_sst_4km.nc
	(lf <- length(f))

	# shapefile for a multi-feature study area
	shp.area <- readOGR(layer="shapename",dsn="/....") # indicate the name and location of the shapefile
	shp.area <- spTransform(shp.area, CRS("+proj=longlat +datum=WGS84"))
	proj4string(shp.area)
	summary(shp.area)
	lst.feat <- levels(shp.area[[2]]) # indicate the column with unique codes for each features

	# spatial extent of the shapefile
	ext.area <- extent(shp.area)

	# land area
	shp.land <- getData('GADM', country='BRA', level=0)
	shp.land <- spTransform(shp.land, CRS("+proj=longlat +datum=WGS84"))
	proj4string(shp.land)

	# plot study area and land
	plot(shp.area,col="lightblue",axes=T)
	plot(shp.land,border="darkgreen",col="gray",add=T)

	# looping for netCDF files
	for (i in 1:lf) {
	# open netCDF file
	nc.data <- nc_open(f[i])

	# extract date information from nc file
	dateini <- ncatt_get(nc.data,0,"time_coverage_start")$value
	dateend <- ncatt_get(nc.data,0,"time_coverage_end")$value
	datemean <- mean(c(as.Date(dateend,"%Y-%m-%dT%H:%M:%OSZ"),as.Date(dateini,"%Y-%m-%dT%H:%M:%OSZ")))
	year <- as.numeric(substring(datemean,0,4))
	month <- as.numeric(substring(datemean,6,7))

	# close netCDF file
	nc_close(nc.data)

	# create a raster with sst data from netCDF file
	ras.data <- raster(f[i],varname="sst") # indicate the nc file
	proj4string(ras.data) <- CRS("+proj=longlat +datum=WGS84")

	# crop the raster to area extent
	crp.data <- crop(ras.data,ext.area,snap="near")
	rm(ras.data)

	# set values higher than 45 to NA
	crp.data[crp.data>=45] <- NA

	# plot study area, land and sst data
	#~ plot(shp.area,axes=T)
	#~ plot(shp.land,border="darkgreen",col="gray",add=T)
	#~ plot(crp.data,add=T)
	#~ plot(shp.area,add=T)

	# looping for features
	for (j in 1:length(lst.feat)) {

	# progress indicator
	print(sprintf("Processing file %i from %s feat %i from %i: %s %s",i,length(f),j,length(lst.feat),f[i],lst.feat[j]))

	# select crp.data area under a feature
	fea.data <- unlist((crp.data[shp.area[shp.area$cod_area==lst.feat[j],],]),use.names=FALSE)

	# get statistics
	sta.min <- min(na.omit(fea.data))
	sta.mean <- mean(na.omit(fea.data))
	sta.median <- median(na.omit(fea.data))
	sta.max <- max(na.omit(fea.data))

	# prepare final data set
	dat.output <- data.frame(year,month,lst.feat[j],sta.min,sta.mean,sta.median,sta.max)
	names(dat.output)<-c("year","month","feature","SSTmin","SSTmean","SSTmedian","SSTmax")

	# save csv file
	fe <- file.exists("MODISA_nsst.csv")
	write.table(dat.output,"MODISA_nsst.csv",row.names=FALSE,col.names=!fe,sep=",",dec=".",append=fe) # change separator and decimal strings if necessary

	# clean workspace
	rm(fea.data,sta.min,sta.mean,sta.median,sta.max,dat.output,fe)
	}

	# clean workspace
	rm(nc.data,dateini,dateend,datemean,year,month,ras.data,crp.data)
	}