Skip to content

Instantly share code, notes, and snippets.

@edobez
Created January 24, 2019 16:15
Show Gist options
  • Save edobez/749a117cb3e66d40e3386393f3e86dfe to your computer and use it in GitHub Desktop.
Save edobez/749a117cb3e66d40e3386393f3e86dfe to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from netCDF4 import Dataset
import netCDF4 as nc4
import os
import gdal
TILE_PARAMS = {
'h17v04': (50, 40, -15.557, 0.013, 0.001),
'h17v05': (40, 30, -13.054, 0.011, 0.001)
}
def compute_band_value(refl, coord, tile_params, tile_size=2400):
"""
Compute band value at specified coordinates
:param refl: Handle to object open with gdal.Open
:param coord: 2-tuple with coordinate values
:param tile_params: 5-tuple with tile parameters
:param tile_size: size of tile (default = 2400)
:return: band value
"""
lon_refl, lat_refl = coord
lat1, lat0, lon1, lon0, res = tile_params
# define borders of final file that I want
wgs84_wkt = 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.01745329251994328,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]'
# create dimensions final file
x_size = int((lon1 - lon0)/(-1*res))
y_size = int((lat1 - lat0)/(res))
lats = np.linspace(lat1, lat0-res, num=y_size)
lats = np.around(lats[:], decimals=3)
lons = np.linspace(lon1, lon0-res, num=x_size)
lons = np.around(lons[:], decimals=3)
# define geotrasformation
geot = [lon1, res, 0., lat1, 0., -1*res]
# create temporary (that's why 'MEM') file of same size of final file, filled with array of null raster values (-9999.9)
dst = gdal.GetDriverByName('MEM').Create(
'', x_size, y_size, 1, gdal.GDT_Float32,) # last one is the data type
dst.GetRasterBand(1).WriteArray(
np.ones((y_size, x_size), dtype=np.float32) * -9999.9)
dst.SetGeoTransform(geot)
dst.SetProjection(wgs84_wkt)
# create temporary file of signle band equal to var of interest from starting netcdf (the tile)
src = gdal.GetDriverByName('MEM').Create(
'', tile_size, tile_size, 1, gdal.GDT_Float32,)
tile = refl
band = tile.GetRasterBand(1).ReadAsArray()
src.GetRasterBand(1).WriteArray(band)
src.SetGeoTransform(tile.GetGeoTransform())
src.SetProjection(tile.GetProjection())
gdal.ReprojectImage(src, dst, src.GetProjection(
), dst.GetProjection(), gdal.GRA_NearestNeighbour)
idx_dst_x = np.where(lons == lon_refl)[0][0]-1
idx_dst_y = np.where(lats == lat_refl)[0][0]-1
return dst.ReadAsArray()[idx_dst_y, idx_dst_x]
def extract_data_from_file(filepath, df):
"""
Extract band values from file, on the coordinates specified in the input dataframe.
Returns a dataframe of the same type of the input one, but filled with data.
:param filepath: complete path of the file to be processed
:param df: pandas DataFrame containing only rows of specified tile
:return: As input df but filled with data
"""
ds = gdal.Open(filepath)
sub_ds = ds.GetSubDatasets()
# Create a list of names: [refl1, refl2, ..., refl7]
bands = ['refl{}'.format(n+1) for n in range(7)]
# Foreach band
for band_idx, band_name in enumerate(bands):
band_refl = gdal.Open(sub_ds[band_idx+7][0])
# Foreach coordinate
for c_idx in df.index:
tile_params = TILE_PARAMS[df['tile']]
coord = (round(df['Lon_WGS84'], 3), round(df['Lat_WGS84'], 3))
df.at[c_idx, band_name] = compute_band_value(
band_refl, coord, tile_params) # Fill dataframe with data
return df
def find_tile_file(tile_name, folder_path):
"""
Find file relative to tile inside specified folder
:return: Complete path of file containing the string 'tile_name'
"""
for file in os.listdir(folder_path):
if tile_name in file:
return os.path.abspath(file)
coordinate_filepath = '/g/data1a/xc0/user/scortechini/reflectance/alltogether.csv'
coordinate_header = ['ID', 'X', 'Y', 'Lat_WGS84', 'Lon_WGS84', 'tile',
'refl1', 'refl2', 'refl3', 'refl4', 'refl5', 'refl6', 'refl7']
coordinates = pd.read_csv(coordinate_filepath, names=coordinate_header)
data_folder = '/g/data2/u39/public/data/modis/lpdaac-tiles-c6/MCD43A4.006'
# Foreach date
dataframes_over_dates = []
for folder in os.listdir(data_folder):
# Group over tiles and interate
dataframes_over_tiles = []
for tile_name, group in coordinates.groupby('tile'):
tile_filepath = find_tile_file(tile_name, folder)
tile_dataframe = extract_data_from_file(tile_filepath, group)
dataframes_over_tiles.append(tile_dataframe)
# Concatenate dataframe of different tiles together and add to list
concatenated = pd.concat(dataframes_over_tiles).sort_index()
dataframes_over_dates.append(concatenated)
# Now you have a list of DataFrames. Each DF is relative to one folder/date and has the same structure of the original `coordinates` DF.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment