-
-
Save loicdtx/8dfd69b0c5d09e13a6b5011b9aa10e9b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
import argparse | |
import netCDF4 as nc | |
import numpy as np | |
from pyproj import Proj | |
from affine import Affine | |
import rasterio | |
from rasterio.crs import CRS | |
from glob import glob | |
from math import floor | |
def bin_map_numpy(file_list, variable, south, north, west, east, resolution, proj4string, filename): | |
"""Performs binning and mapping of a list of level 2 Ocean color files | |
Attempt to reproduce the behaviour of seadas l2bin + l3mapgen | |
Args: | |
file_list (list): List of input (level 2) file names | |
variable (str): Geophysical variable to process (Must be contained in the files under the 'geophysical_data' group) | |
south (float): Southern border of output extent (in DD) | |
north (float): Northern border of output extent (in DD) | |
west (float): Western border of output extent (in DD) | |
east (float): Eastern border of output extent (in DD) | |
resolution (float): Output resolution (in the unit of the output coordinate reference system) | |
proj4string (str): Coordinate reference system of the output in proj4 format | |
filename (str): Output filename (only tif is supported at the moment) | |
Return: | |
np.array: The array with binned values | |
""" | |
lon_dd = np.array([]) | |
lat_dd = np.array([]) | |
var = np.array([]) | |
# Ingest all the L2 data their coordinates in a flattened form | |
for file in file_list: | |
with nc.Dataset(file) as src: | |
lon_dd = np.append(lon_dd, src.groups['navigation_data'].variables['longitude'][:].flatten()) | |
lat_dd = np.append(lat_dd, src.groups['navigation_data'].variables['latitude'][:].flatten()) | |
var = np.append(var, src.groups['geophysical_data'].variables[variable][:].flatten()) | |
# TODO: Include the flags as well and perform cleaning | |
# Instantiate proj object | |
p = Proj(proj4string) | |
# Find output corners coordinates in output CRS | |
# TODO: This does not account for the extent deformation induced by the projection | |
top_left = p(west, north) | |
bottom_right = p(east, south) | |
# Define output array shape (nrow, ncol) | |
destination_shape = ( int( abs(top_left[1] - bottom_right[1]) / float(resolution)), int( abs(top_left[0] - bottom_right[0]) / float(resolution)) ) | |
# Define affine transform and the inverse transform | |
aff = Affine(resolution, 0.0, top_left[0], 0.0, -resolution, top_left[1]) | |
ffa = ~aff | |
# Convert the lat and lon arrays to projected coordinates | |
lon_proj, lat_proj = p(lon_dd, lat_dd) | |
# Convert projected coordinates to destination array indices | |
destination_ids = ffa * (lon_proj, lat_proj) | |
# Perform 2d data binning (to average all input coordinates falling withing the same outut pixel) | |
# 1 Sum up within each bin | |
dst_array, _, _ = np.histogram2d(destination_ids[1], destination_ids[0], bins=(range(0, destination_shape[0] + 1, 1), range(0, destination_shape[1] + 1, 1)), weights=var, normed=False) | |
# Retrieve count per bin | |
counts, _, _ = np.histogram2d(destination_ids[1], destination_ids[0], bins=(range(0, destination_shape[0] + 1, 1), range(0, destination_shape[1] + 1, 1))) | |
# Compute average value per bin | |
dst_array = dst_array / counts | |
dst_array = np.ma.masked_invalid(dst_array) | |
# Prepare output file metadata | |
geo_dict = {'crs': CRS.from_string(proj4string), | |
'affine': aff, | |
'height': destination_shape[0], | |
'width': destination_shape[1], | |
'driver': u'GTiff', | |
'dtype': rasterio.float32, # TODO: Better control datatype | |
'count': 1, | |
'nodata': 0} | |
# write array to file | |
with rasterio.open(filename, 'w', **geo_dict) as dst: | |
dst.write_band(1, dst_array.astype(rasterio.float32)) | |
return dst_array | |
# Function to pass to argparse | |
def main(file_list, variable, south, north, west, east, resolution, proj4string, filename, plot = False, vmax = 10): | |
array = bin_map_numpy(file_list, variable, south, north, west, east, resolution, proj4string, filename) | |
if plot: | |
import matplotlib.pyplot as plt | |
from matplotlib.colors import LogNorm | |
plt.imshow(array, interpolation = "none", norm=LogNorm(vmax = vmax), cmap = 'jet') | |
plt.colorbar() | |
plt.show() | |
if __name__ == '__main__': | |
epilog = ('Experimental CLI to bin L2 ocean data using numpy\n\n' | |
'------------\n' | |
'Example usage:\n' | |
'------------\n\n' | |
'# Download some test data (around Mexico)\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007174000.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007174500.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007191500.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007192000.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007192500.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007205500.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007210000.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007210500.L2_LAC_OC.nc\n\n' | |
'# Generate binned file in Lambert Azimuthal Equal Area at 2 km resolution\n' | |
'./geo_bin.py -infiles A2016007*.L2_LAC_OC.nc -var chlor_a -north 33 -south 3 -west -122 -east -72 ' | |
'-res 2000 -proj "+proj=laea +lat_0=18 +lon_0=-100" -filename A2016007.L3m_DAY_CHL_chlor_a_2km.tif\n\n' | |
'# If you have matplotlib installed, you can visualized the file created\n' | |
'./geo_bin.py -infiles A2016007*.L2_LAC_OC.nc -var chlor_a -north 33 -south 3 -west -122 -east -72 ' | |
'-res 2000 -proj "+proj=laea +lat_0=18 +lon_0=-100" -filename A2016007.L3m_DAY_CHL_chlor_a_2km.tif --plot\n\n' | |
'\n ') | |
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) | |
parser.add_argument('-infiles', '--file_list', nargs='+', | |
help = 'Input L2 files') | |
parser.add_argument('-var', '--variable', | |
required = True, | |
help = 'L3m variable') | |
parser.add_argument('-north', '--north', | |
type = float, | |
required = True, | |
help = 'Northern boundary in DD') | |
parser.add_argument('-south', '--south', | |
type = float, | |
required = True, | |
help = 'Southern boundary in DD') | |
parser.add_argument('-east', '--east', | |
type = float, | |
required = True, | |
help = 'Eastern most boundary in DD') | |
parser.add_argument('-west', '--west', | |
type = float, | |
required = True, | |
help = 'Western most boundary in DD') | |
parser.add_argument('-res', '--resolution', | |
type = float, | |
required = True, | |
help = 'Output resolution in CRS unit') | |
parser.add_argument('-proj', '--proj4string', | |
required = True, | |
help = 'Output CRS in proj4 format (Quotted)') | |
parser.add_argument('-filename', '--filename', | |
required = True, | |
help = 'Output (tif) filename') | |
parser.add_argument('--plot', action='store_true', | |
help = 'plot the output with matplotlib') | |
parser.set_defaults(plot=False) | |
parser.add_argument('-vmax', '--vmax', | |
help = 'if --plot, maximum value used for color scaling') | |
parser.set_defaults(vmax=10) | |
parsed_args = parser.parse_args() | |
main(**vars(parsed_args)) |
Hi @gcaria,
Sorry I won't be able to help much with your request; I no longer work on this project, nor I am involved in this field of research. From what I remember, the version in the gist is pure python/numpy implementation; it is much faster than seadas l2bin but also less meticulous (e.g. in how values are averaged, etc). Differences are probably small but if you care for precision; want to do quantitative analysis, you're better off using the seadas command line directly, or a python wrapper that calls the command line. I had written such wrappers here.
Cheers
Thanks for taking your time to answer even though you don't work on this code anymore, really appreciated !
It's great that your code is public so people can use it and/or get inspiration from it.
I keep writing mostly for people who are interested in this binning topic and might come across this web page.
About the difference between your implementation and the "fancier" ones, I have quickly checked out this document that was referenced in this post of yours, and although it is mentioned that
Binning algorithms employed by the various ocean-colour missions differ in several ways. For instance, they use different spatial and temporal bin sizes, calculate statistics of binned data in different ways, and use different schemes for
weighting the data in each bin. In addition, the criteria used to screen data of lower quality, and even the definition of a “data day", may be different.
it seems that MODIS itself uses a simple arithmetic average (or linear average), however different average methods (listed in the document) show a relative difference of up to 20%.
Hi Loïc,
First of all, impressive work! And thanks for sharing this code.
Question:
You flattened the geophysical data read from netcdf4 files.
var = np.append(var, src.groups['geophysical_data'].variables[variable][:].flatten())
Do you have any concern that this may slow down the process? I saw on stackoverflow people complaining about reduced speed due to flattening. See link below.
http://stackoverflow.com/questions/21740324/fastest-way-to-get-netcdf-variable-min-max-using-python
-mike