Created
January 24, 2019 16:15
-
-
Save edobez/749a117cb3e66d40e3386393f3e86dfe to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from pandas import Series, DataFrame | |
from netCDF4 import Dataset | |
import netCDF4 as nc4 | |
import os | |
import gdal | |
TILE_PARAMS = { | |
'h17v04': (50, 40, -15.557, 0.013, 0.001), | |
'h17v05': (40, 30, -13.054, 0.011, 0.001) | |
} | |
def compute_band_value(refl, coord, tile_params, tile_size=2400): | |
""" | |
Compute band value at specified coordinates | |
:param refl: Handle to object open with gdal.Open | |
:param coord: 2-tuple with coordinate values | |
:param tile_params: 5-tuple with tile parameters | |
:param tile_size: size of tile (default = 2400) | |
:return: band value | |
""" | |
lon_refl, lat_refl = coord | |
lat1, lat0, lon1, lon0, res = tile_params | |
# define borders of final file that I want | |
wgs84_wkt = 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.01745329251994328,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]' | |
# create dimensions final file | |
x_size = int((lon1 - lon0)/(-1*res)) | |
y_size = int((lat1 - lat0)/(res)) | |
lats = np.linspace(lat1, lat0-res, num=y_size) | |
lats = np.around(lats[:], decimals=3) | |
lons = np.linspace(lon1, lon0-res, num=x_size) | |
lons = np.around(lons[:], decimals=3) | |
# define geotrasformation | |
geot = [lon1, res, 0., lat1, 0., -1*res] | |
# create temporary (that's why 'MEM') file of same size of final file, filled with array of null raster values (-9999.9) | |
dst = gdal.GetDriverByName('MEM').Create( | |
'', x_size, y_size, 1, gdal.GDT_Float32,) # last one is the data type | |
dst.GetRasterBand(1).WriteArray( | |
np.ones((y_size, x_size), dtype=np.float32) * -9999.9) | |
dst.SetGeoTransform(geot) | |
dst.SetProjection(wgs84_wkt) | |
# create temporary file of signle band equal to var of interest from starting netcdf (the tile) | |
src = gdal.GetDriverByName('MEM').Create( | |
'', tile_size, tile_size, 1, gdal.GDT_Float32,) | |
tile = refl | |
band = tile.GetRasterBand(1).ReadAsArray() | |
src.GetRasterBand(1).WriteArray(band) | |
src.SetGeoTransform(tile.GetGeoTransform()) | |
src.SetProjection(tile.GetProjection()) | |
gdal.ReprojectImage(src, dst, src.GetProjection( | |
), dst.GetProjection(), gdal.GRA_NearestNeighbour) | |
idx_dst_x = np.where(lons == lon_refl)[0][0]-1 | |
idx_dst_y = np.where(lats == lat_refl)[0][0]-1 | |
return dst.ReadAsArray()[idx_dst_y, idx_dst_x] | |
def extract_data_from_file(filepath, df): | |
""" | |
Extract band values from file, on the coordinates specified in the input dataframe. | |
Returns a dataframe of the same type of the input one, but filled with data. | |
:param filepath: complete path of the file to be processed | |
:param df: pandas DataFrame containing only rows of specified tile | |
:return: As input df but filled with data | |
""" | |
ds = gdal.Open(filepath) | |
sub_ds = ds.GetSubDatasets() | |
# Create a list of names: [refl1, refl2, ..., refl7] | |
bands = ['refl{}'.format(n+1) for n in range(7)] | |
# Foreach band | |
for band_idx, band_name in enumerate(bands): | |
band_refl = gdal.Open(sub_ds[band_idx+7][0]) | |
# Foreach coordinate | |
for c_idx in df.index: | |
tile_params = TILE_PARAMS[df['tile']] | |
coord = (round(df['Lon_WGS84'], 3), round(df['Lat_WGS84'], 3)) | |
df.at[c_idx, band_name] = compute_band_value( | |
band_refl, coord, tile_params) # Fill dataframe with data | |
return df | |
def find_tile_file(tile_name, folder_path): | |
""" | |
Find file relative to tile inside specified folder | |
:return: Complete path of file containing the string 'tile_name' | |
""" | |
for file in os.listdir(folder_path): | |
if tile_name in file: | |
return os.path.abspath(file) | |
coordinate_filepath = '/g/data1a/xc0/user/scortechini/reflectance/alltogether.csv' | |
coordinate_header = ['ID', 'X', 'Y', 'Lat_WGS84', 'Lon_WGS84', 'tile', | |
'refl1', 'refl2', 'refl3', 'refl4', 'refl5', 'refl6', 'refl7'] | |
coordinates = pd.read_csv(coordinate_filepath, names=coordinate_header) | |
data_folder = '/g/data2/u39/public/data/modis/lpdaac-tiles-c6/MCD43A4.006' | |
# Foreach date | |
dataframes_over_dates = [] | |
for folder in os.listdir(data_folder): | |
# Group over tiles and interate | |
dataframes_over_tiles = [] | |
for tile_name, group in coordinates.groupby('tile'): | |
tile_filepath = find_tile_file(tile_name, folder) | |
tile_dataframe = extract_data_from_file(tile_filepath, group) | |
dataframes_over_tiles.append(tile_dataframe) | |
# Concatenate dataframe of different tiles together and add to list | |
concatenated = pd.concat(dataframes_over_tiles).sort_index() | |
dataframes_over_dates.append(concatenated) | |
# Now you have a list of DataFrames. Each DF is relative to one folder/date and has the same structure of the original `coordinates` DF. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment