-
-
Save aolinto/79e184f6c156c6ab21b3 to your computer and use it in GitHub Desktop.
# Antonio Olinto Avila-da-Silva, Instituto de Pesca, Brasil | |
# ver 2016-05-17 | |
# https://gist.github.com/aolinto/79e184f6c156c6ab21b3 | |
# script to process Aqua MODIS Sea Surface Temperature | |
# files downloaded from http://oceancolor.gsfc.nasa.gov/cgi/l3 | |
# Aqua MODIS Sea Surface temperature 11 u daytime Monthly 9 km SMI images | |
# all .L3m_MO_SST_sst_9km.nc files must be in the working directory | |
# the script will open each nc file, read date, lon, lat and sst data, | |
# then select data from specified area and write them into | |
# a single csv file named MODISA_sst.csv | |
# Some reference pages | |
# http://geog.uoregon.edu/GeogR/topics/netCDF-read-ncdf4.html | |
# https://scottishsnow.wordpress.com/2014/08/24/many-rastered-beast/ | |
# load libraries | |
# ncdf4 needs libnetcdf-dev netcdf-bin in Linux | |
# install.packages(c("ncdf4","reshape2")) | |
library("ncdf4") | |
library("reshape2") | |
# set working directory | |
setwd("/mnt/Dados02/MODIS/SST") # indicate the path to the files | |
file.exists("MODISA_sst.csv") # caution new data will be appended to this file if it already exists | |
# file.rename("MODISA_sst.csv","MODISA_sst.old") | |
# file.remove("MODISA_sst.csv") | |
# list and remove objects | |
ls() | |
rm(list = ls()) | |
# set the study area in decimal degrees | |
lonmax<--40 | |
lonmin<--50 | |
latmax<--22 | |
latmin<--29 | |
# create a list of files and indicate its length | |
(f <- list.files(".", pattern="*.L3m_MO_SST_sst_9km.nc",full.names=F)) | |
(lf<-length(f)) | |
# variable | |
var<-"sst" | |
for (i in 1:lf) { | |
# progress indicator | |
print(paste("Processing file",i,"from",length(f),f[i],sep=" ")) | |
# open netCDF file | |
data<-nc_open(f[i]) | |
# extract data | |
lon<-ncvar_get(data,"lon") | |
lat<-ncvar_get(data,"lat") | |
value<-ncvar_get(data,var) | |
unit<-ncatt_get(data,var,"units")$value | |
# matrix to data.frame | |
dimnames(value)<-list(lon=lon,lat=lat) | |
dat.var<-melt(value,id="lon") | |
# select data from the study area taking out missing data | |
dat.varSAtmp<-subset(dat.var,lon<=lonmax & lon>=lonmin & lat<=latmax & lat>=latmin & value<45) | |
# extract date information | |
dateini<-ncatt_get(data,0,"time_coverage_start")$value | |
dateend<-ncatt_get(data,0,"time_coverage_end")$value | |
datemean<-mean(c(as.Date(dateend,"%Y-%m-%dT%H:%M:%OSZ"),as.Date(dateini,"%Y-%m-%dT%H:%M:%OSZ"))) | |
year<-substring(datemean,0,4) | |
month<-substring(datemean,6,7) | |
# prepare final data set | |
dat.varSA<-data.frame(rep(as.integer(year,nrow(dat.varSAtmp))),rep(as.integer(month,nrow(dat.varSAtmp))), | |
dat.varSAtmp,rep(unit,nrow(dat.varSAtmp)),rep(var,nrow(dat.varSAtmp))) | |
names(dat.varSA)<-c("year","month","lon","lat","value","unit","var") | |
# save csv file | |
fe<-file.exists("MODISA_sst.csv") | |
write.table(dat.varSA,"MODISA_sst.csv",row.names=FALSE,col.names=!fe,sep=";",dec=",",append=fe) | |
# close connection | |
nc_close(data) | |
# clean workspace | |
rm(data,lon,lat,value,unit,dat.var,dat.varSAtmp,dateini,dateend,datemean,year,month,dat.varSA,fe) | |
} | |
rm(var,f,i,latmax,latmin,lf,lonmax,lonmin) |
Hi Nodali20
I downloaded some daily mapped SST images from https://oceancolor.gsfc.nasa.gov/l3/order/ for the period 2019-12-01 to 2020-01-31 (62 files). Download links were like this:
https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/AQUA_MODIS.20191201.L3m.DAY.SST.sst.4km.nc
I made some adjustments to the code to save de day and it ran smoothly. I really could not identify your mistake.
Be sure to delete the MODISA_sst.csv before run the script otherwise the data will always be appended. Also I set the code to decimal points and comma as separator.
Looping became this way
for (i in 1:lf) {
# progress indicator
print(paste("Processing file",i,"from",length(f),f[i],sep=" "))
# open netCDF file
data<-nc_open(f[i])
# extract data
lon<-ncvar_get(data,"lon")
lat<-ncvar_get(data,"lat")
value<-ncvar_get(data,var)
unit<-ncatt_get(data,var,"units")$value
# matrix to data.frame
dimnames(value)<-list(lon=lon,lat=lat)
dat.var<-melt(value,id="lon")
# select data from the study area taking out missing data
dat.varSAtmp<-subset(dat.var,lon<=lonmax & lon>=lonmin & lat<=latmax & lat>=latmin & value<45)
# extract date information
dateini<-ncatt_get(data,0,"time_coverage_start")$value
dateend<-ncatt_get(data,0,"time_coverage_end")$value
datemean<-mean(c(as.Date(dateend,"%Y-%m-%dT%H:%M:%OSZ"),as.Date(dateini,"%Y-%m-%dT%H:%M:%OSZ")))
year<-substring(datemean,0,4)
month<-substring(datemean,6,7)
day<-substring(datemean,9,10) # <<< get the day
# prepare final data set
dat.varSA<-data.frame(rep(as.integer(year,nrow(dat.varSAtmp))),
rep(as.integer(month,nrow(dat.varSAtmp))),
rep(as.integer(day,nrow(dat.varSAtmp))), # <<< insert the day >>>
dat.varSAtmp,
rep(unit,nrow(dat.varSAtmp)),
rep(var,nrow(dat.varSAtmp)))
names(dat.varSA)<-c("year","month","day","lon","lat","value","unit","var") # <<< insert the day >>>
# save csv file
fe<-file.exists("MODISA_sst.csv")
write.table(dat.varSA,"MODISA_sst.csv",row.names=FALSE,col.names=!fe,sep=",",dec=".",append=fe) # <<< change to decimal point >>>
# close connection
nc_close(data)
# clean workspace
rm(data,lon,lat,value,unit,dat.var,dat.varSAtmp,dateini,dateend,datemean,year,month,day,dat.varSA,fe)
}
I got a file with lines like
...
2019,12,31,-40.6041641235352,-28.9791698455811,23.7449994692579,"degree_C","sst"
2019,12,31,-40.5624961853027,-28.9791698455811,23.6949994703755,"degree_C","sst"
2019,12,31,-40.5208282470703,-28.9791698455811,23.6899994704872,"degree_C","sst"
2019,12,31,-40.4791641235352,-28.9791698455811,23.414999476634,"degree_C","sst"
2019,12,31,-40.1458282470703,-28.9791698455811,23.6699994709343,"degree_C","sst"
2019,12,31,-40.1041641235352,-28.9791698455811,24.1849994594231,"degree_C","sst"
2019,12,31,-40.0624961853027,-28.9791698455811,24.1149994609877,"degree_C","sst"
2019,12,31,-40.0208282470703,-28.9791698455811,23.9699994642287,"degree_C","sst"
2020,1,1,-41.4374961853027,-22.0208358764648,28.6249993601814,"degree_C","sst"
2020,1,1,-41.3958282470703,-22.0208358764648,28.6949993586168,"degree_C","sst"
2020,1,1,-41.3541641235352,-22.0208358764648,28.8199993558228,"degree_C","sst"
2020,1,1,-41.3124961853027,-22.0208358764648,28.9299993533641,"degree_C","sst"
2020,1,1,-41.0208282470703,-22.0208358764648,25.0849994393066,"degree_C","sst"
2020,1,1,-40.9791641235352,-22.0208358764648,25.5149994296953,"degree_C","sst"
...
You can try to extract the values of different files without the looping. Just set the i value (like i <- 5) and run lines 45 to 75, then check the csv file. Choose another nc file from another year and month (i <- 312) and run lines 45-75 again and see what happens in the csv file.
I hope you succeed. Let me know what happened.
Thank you very much for your reply Dr. Antonio Silva,
It turns out that the files I downloaded were corrupted.
The code is now running smoothly.
I do appreciate your help and thank you for your code.
Best regards,
Nodali
Hello, Thanks for sharing your script
I try to extract data from several years and compare it with a buoy, so data MODISA vs data buoy, same data period, and frequency of data
I reduce de lonmax, lonmin, latmax, latmin until they are at the same point (pixel). It is an easy way to do this?
I take *sst4.nc data so I change the variable to "sst4"
but the analysis stop in the 3th file (see below)
[1] "Processing file 1 from 355 AQUA_MODIS.20150101_20150108.L3m.8D.SST4.x_sst4.nc"
[1] "Processing file 2 from 355 AQUA_MODIS.20150109_20150116.L3m.8D.SST4.x_sst4.nc"
[1] "Processing file 3 from 355 AQUA_MODIS.20150117_20150124.L3m.8D.SST4.x_sst4.nc"
Error in data.frame(rep(as.integer(year, nrow(dat.varSAtmp))), rep(as.integer(month, :
arguments imply differing number of rows: 1, 0
Could you help me to resolve this, please?
Dear Dr. Antonio Silva,
I have a list of nc files of SST data from 2019 to 2020 (441 files) downloaded from here: https://oceancolor.gsfc.nasa.gov/l3/order/
I tried to use your code and it read all the files. However, it could not extract the data for multiple year, month, and date.
In the file (i.e., MODISA_sst.csv), it only show one year (2019), one month (1). I tried to modify the code by adding this code after line 62 in your code to extract the date:
day <- substring(datemean,9,10)
and then add this following code after line 65, and 67
rep(as.integer(day,nrow(dat.varSAtmp))),
names(dat.varSA)<-c("year","month","day", "lon","lat","value","unit","var", "date")so, it looks like this:
But the results were not as I expected.
What did I miss?
Thank you.