Last active
July 1, 2020 09:54
-
-
Save loicdtx/8dfd69b0c5d09e13a6b5011b9aa10e9b to your computer and use it in GitHub Desktop.
ocean color binning with numpy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import netCDF4 as nc | |
import numpy as np | |
from pyproj import Proj | |
from affine import Affine | |
import rasterio | |
from rasterio.crs import CRS | |
from glob import glob | |
from math import floor | |
def bin_map_numpy(file_list, variable, south, north, west, east, resolution, proj4string, filename): | |
"""Performs binning and mapping of a list of level 2 Ocean color files | |
Attempt to reproduce the behaviour of seadas l2bin + l3mapgen | |
Args: | |
file_list (list): List of input (level 2) file names | |
variable (str): Geophysical variable to process (Must be contained in the files under the 'geophysical_data' group) | |
south (float): Southern border of output extent (in DD) | |
north (float): Northern border of output extent (in DD) | |
west (float): Western border of output extent (in DD) | |
east (float): Eastern border of output extent (in DD) | |
resolution (float): Output resolution (in the unit of the output coordinate reference system) | |
proj4string (str): Coordinate reference system of the output in proj4 format | |
filename (str): Output filename (only tif is supported at the moment) | |
Return: | |
np.array: The array with binned values | |
""" | |
lon_dd = np.array([]) | |
lat_dd = np.array([]) | |
var = np.array([]) | |
# Ingest all the L2 data their coordinates in a flattened form | |
for file in file_list: | |
with nc.Dataset(file) as src: | |
lon_dd = np.append(lon_dd, src.groups['navigation_data'].variables['longitude'][:].flatten()) | |
lat_dd = np.append(lat_dd, src.groups['navigation_data'].variables['latitude'][:].flatten()) | |
var = np.append(var, src.groups['geophysical_data'].variables[variable][:].flatten()) | |
# TODO: Include the flags as well and perform cleaning | |
# Instantiate proj object | |
p = Proj(proj4string) | |
# Find output corners coordinates in output CRS | |
# TODO: This does not account for the extent deformation induced by the projection | |
top_left = p(west, north) | |
bottom_right = p(east, south) | |
# Define output array shape (nrow, ncol) | |
destination_shape = ( int( abs(top_left[1] - bottom_right[1]) / float(resolution)), int( abs(top_left[0] - bottom_right[0]) / float(resolution)) ) | |
# Define affine transform and the inverse transform | |
aff = Affine(resolution, 0.0, top_left[0], 0.0, -resolution, top_left[1]) | |
ffa = ~aff | |
# Convert the lat and lon arrays to projected coordinates | |
lon_proj, lat_proj = p(lon_dd, lat_dd) | |
# Convert projected coordinates to destination array indices | |
destination_ids = ffa * (lon_proj, lat_proj) | |
# Perform 2d data binning (to average all input coordinates falling withing the same outut pixel) | |
# 1 Sum up within each bin | |
dst_array, _, _ = np.histogram2d(destination_ids[1], destination_ids[0], bins=(range(0, destination_shape[0] + 1, 1), range(0, destination_shape[1] + 1, 1)), weights=var, normed=False) | |
# Retrieve count per bin | |
counts, _, _ = np.histogram2d(destination_ids[1], destination_ids[0], bins=(range(0, destination_shape[0] + 1, 1), range(0, destination_shape[1] + 1, 1))) | |
# Compute average value per bin | |
dst_array = dst_array / counts | |
dst_array = np.ma.masked_invalid(dst_array) | |
# Prepare output file metadata | |
geo_dict = {'crs': CRS.from_string(proj4string), | |
'affine': aff, | |
'height': destination_shape[0], | |
'width': destination_shape[1], | |
'driver': u'GTiff', | |
'dtype': rasterio.float32, # TODO: Better control datatype | |
'count': 1, | |
'nodata': 0} | |
# write array to file | |
with rasterio.open(filename, 'w', **geo_dict) as dst: | |
dst.write_band(1, dst_array.astype(rasterio.float32)) | |
return dst_array | |
# Function to pass to argparse | |
def main(file_list, variable, south, north, west, east, resolution, proj4string, filename, plot = False, vmax = 10): | |
array = bin_map_numpy(file_list, variable, south, north, west, east, resolution, proj4string, filename) | |
if plot: | |
import matplotlib.pyplot as plt | |
from matplotlib.colors import LogNorm | |
plt.imshow(array, interpolation = "none", norm=LogNorm(vmax = vmax), cmap = 'jet') | |
plt.colorbar() | |
plt.show() | |
if __name__ == '__main__': | |
epilog = ('Experimental CLI to bin L2 ocean data using numpy\n\n' | |
'------------\n' | |
'Example usage:\n' | |
'------------\n\n' | |
'# Download some test data (around Mexico)\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007174000.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007174500.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007191500.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007192000.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007192500.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007205500.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007210000.L2_LAC_OC.nc\n' | |
'wget https://oceandata.sci.gsfc.nasa.gov/cgi/getfile/A2016007210500.L2_LAC_OC.nc\n\n' | |
'# Generate binned file in Lambert Azimuthal Equal Area at 2 km resolution\n' | |
'./geo_bin.py -infiles A2016007*.L2_LAC_OC.nc -var chlor_a -north 33 -south 3 -west -122 -east -72 ' | |
'-res 2000 -proj "+proj=laea +lat_0=18 +lon_0=-100" -filename A2016007.L3m_DAY_CHL_chlor_a_2km.tif\n\n' | |
'# If you have matplotlib installed, you can visualized the file created\n' | |
'./geo_bin.py -infiles A2016007*.L2_LAC_OC.nc -var chlor_a -north 33 -south 3 -west -122 -east -72 ' | |
'-res 2000 -proj "+proj=laea +lat_0=18 +lon_0=-100" -filename A2016007.L3m_DAY_CHL_chlor_a_2km.tif --plot\n\n' | |
'\n ') | |
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) | |
parser.add_argument('-infiles', '--file_list', nargs='+', | |
help = 'Input L2 files') | |
parser.add_argument('-var', '--variable', | |
required = True, | |
help = 'L3m variable') | |
parser.add_argument('-north', '--north', | |
type = float, | |
required = True, | |
help = 'Northern boundary in DD') | |
parser.add_argument('-south', '--south', | |
type = float, | |
required = True, | |
help = 'Southern boundary in DD') | |
parser.add_argument('-east', '--east', | |
type = float, | |
required = True, | |
help = 'Eastern most boundary in DD') | |
parser.add_argument('-west', '--west', | |
type = float, | |
required = True, | |
help = 'Western most boundary in DD') | |
parser.add_argument('-res', '--resolution', | |
type = float, | |
required = True, | |
help = 'Output resolution in CRS unit') | |
parser.add_argument('-proj', '--proj4string', | |
required = True, | |
help = 'Output CRS in proj4 format (Quotted)') | |
parser.add_argument('-filename', '--filename', | |
required = True, | |
help = 'Output (tif) filename') | |
parser.add_argument('--plot', action='store_true', | |
help = 'plot the output with matplotlib') | |
parser.set_defaults(plot=False) | |
parser.add_argument('-vmax', '--vmax', | |
help = 'if --plot, maximum value used for color scaling') | |
parser.set_defaults(vmax=10) | |
parsed_args = parser.parse_args() | |
main(**vars(parsed_args)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for taking your time to answer even though you don't work on this code anymore, really appreciated !
It's great that your code is public so people can use it and/or get inspiration from it.
I keep writing mostly for people who are interested in this binning topic and might come across this web page.
About the difference between your implementation and the "fancier" ones, I have quickly checked out this document that was referenced in this post of yours, and although it is mentioned that
it seems that MODIS itself uses a simple arithmetic average (or linear average), however different average methods (listed in the document) show a relative difference of up to 20%.