Skip to content

Instantly share code, notes, and snippets.

@nishadhka
Created July 4, 2020 02:02
Show Gist options
  • Save nishadhka/c4f86ff26039d972cdc464acf0f8964e to your computer and use it in GitHub Desktop.
Save nishadhka/c4f86ff26039d972cdc464acf0f8964e to your computer and use it in GitHub Desktop.
S5p large area tiles and mosaic using harp and xarray-pyhton
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Steps:\n",
"\n",
"0. S5p downloaded using Sentinelsat python API\n",
"1. The images are then grouped into day wise after the datetime of image (end position) is converted into local time (IST) from UTC. Subsequent process are carried out on the day grouped netcdf files.\n",
"2. Area extent is split into four tiles(p1-p4). HARP, harpconvert is applied on to the individual images with each tile extent. \n",
"3. HARP, harpmerge is applied on to the split group of tiles. Assuming day average is happening in this step . \n",
"4. The resultant four tiles are applied with unit conversion (into µmol/m2 ).\n",
"5. Used numpy concatenate to mosaic/join the four tiles into one. Tried to use harpmerge in this steps ends up in multiple error."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## to download s5p "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sentinelsat.sentinel import SentinelAPI\n",
"import pandas as pd\n",
"import os\n",
"from pathlib import Path\n",
"import ntpath\n",
"\n",
"\n",
"from sentinelsat.sentinel import SentinelAPI, read_geojson, geojson_to_wkt\n",
"from datetime import date\n",
"\n",
"api=SentinelAPI(user='s5pguest', password='s5pguest', api_url='https://s5phub.copernicus.eu/dhus')\n",
"\n",
"footprint='POLYGON((67.12 7.125, 98.88 38.88, 67.12 38.88,67.12 7.125 ))'\n",
"\n",
"products = api.query(footprint,date=['20200604','20200605'],producttype='L2__NO2___')\n",
"\n",
"products_df = api.to_dataframe(products)\n",
"products_df['key']=products_df.index\n",
"\n",
"keys=products_df.index.tolist()\n",
"\n",
"api.download_all(keys,directory_path='/home/20200604/',checksum=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# to create tiles of an large area extent"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import glob\n",
"import ntpath\n",
"import numpy as np\n",
"import xarray as xr\n",
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def extent_split(plot_extent,resolution):\n",
" yplit=(plot_extent['n']-plot_extent['s'])/2\n",
" xplit=(plot_extent['e']-plot_extent['w'])/2\n",
" p0={'s':plot_extent['s'],'n':plot_extent['s']+yplit+resolution,'w':plot_extent['w'],'e':plot_extent['w']+xplit+resolution}\n",
" p0['ygrid']=round(plot_extent['s']+yplit/resolution)\n",
" p0['xgrid']=round(plot_extent['e']+xplit/resolution)\n",
" p0['s']=p0['s']\n",
" p0['n']=p0['s']+(p0['ygrid']*resolution)+0.1\n",
" p0['e']=p0['w']+(p0['xgrid']*resolution)+0.1\n",
" p0['w']=p0['w']\n",
" p1={'s':plot_extent['s'],'n':plot_extent['s']+yplit+resolution,'w':plot_extent['w']+xplit+resolution,'e':p0['e']+xplit+resolution}\n",
" p1['ygrid']=round(p1['s']+yplit/resolution)\n",
" p1['xgrid']=round(p0['e']+xplit/resolution)\n",
" p1['s']=p1['s']\n",
" p1['n']=p1['s']+(p1['ygrid']*resolution)+0.1\n",
" p1['e']=p1['w']+(p1['xgrid']*resolution)\n",
" p1['w']=p0['e']-0.2\n",
" p2={'s':plot_extent['s']+yplit+resolution,'n':p0['n']+yplit+resolution,'w':plot_extent['w']+xplit+resolution,'e':p0['e']+xplit+resolution}\n",
" p2['ygrid']=p0['ygrid']\n",
" p2['xgrid']=p0['xgrid']\n",
" p2['s']=p0['n']-0.2\n",
" p2['n']=p2['s']+(p2['ygrid']*resolution)\n",
" p2['e']=p1['e']\n",
" p2['w']=p1['w']\n",
" p3={'s':p0['n'],'n':p0['n']+yplit+resolution,'w':plot_extent['w'],'e':plot_extent['w']+xplit+resolution}\n",
" p3['ygrid']=p0['ygrid']\n",
" p3['xgrid']=p0['xgrid']\n",
" p3['s']=p0['n']-0.2\n",
" p3['n']=p2['n']\n",
" p3['e']=p0['e']\n",
" p3['w']=p0['w']\n",
" part={}\n",
" part['p0']=p0\n",
" part['p1']=p1\n",
" part['p2']=p2\n",
" part['p3']=p3\n",
" return part"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## functions for harp_convert and harp_merge"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"harp_convert_cmdkeep=\"derive(datetime_stop {time}); derive(latitude {latitude}); derive(longitude {longitude});keep(tropospheric_NO2_column_number_density,datetime_stop,latitude,longitude)'\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def harp_convert(folderpath,split_extent,resolution):\n",
" ncfiles=glob.glob(folderpath+'/*.nc')\n",
" print(ncfiles)\n",
" print('total number of ncfiles {}'.format(len(ncfiles)))\n",
" for ncfilepath in ncfiles:\n",
" parts_list=['p0','p1','p2','p3']\n",
" for part in parts_list:\n",
" filename=path_leaf(ncfilepath).split('.')[0]\n",
" newfilename=folderpath+filename+'.nc' \n",
" #cord_extent='latitude>23.005;latitude<38.885;longitude>83;longitude<98.88'\n",
" p0=split_extent[part]\n",
" newfilename=folderpath+filename+'_{}.nc'.format(part)\n",
" cord_extent='latitude>{};latitude<{};longitude>{};longitude<{}'.format(p0['s'],p0['n'],p0['w'],p0['e'])\n",
" #bin_spatial='1588,23.005,resolution,1588,83.0,resolution'\n",
" bin_spatial_up=str(int(p0['ygrid']))+','+str(p0['s'])+','+str(resolution)+','+str(int(p0['xgrid']))+','+str(p0['w'])+','+str(resolution)\n",
" command=\"harpconvert --format netcdf --hdf5-compression 9 -a '{}; tropospheric_NO2_column_number_density_validity>75; bin_spatial({}); \".format(cord_extent,bin_spatial_up)+harp_convert_cmdkeep+ \" {} {}\".format(ncfilepath,newfilename)\n",
" #print(\" {} {}\".format(ncfilepath,newfilename))\n",
" print(command)\n",
" os.system(command)\n",
" \n",
"\n",
"harp_merge_cmdkeep=\"derive(longitude {longitude});derive(latitude {latitude})'\"\n",
"\n",
"\n",
"def harp_merge(date,folderpath,split_extent,resolution):\n",
" ncfolder_loc = Path(folderpath)\n",
" os.chdir(folderpath)\n",
" if ncfolder_loc.exists():\n",
" parts_list=['p0','p1','p2','p3']\n",
" for part in parts_list:\n",
" filename='{}_{}.nc'.format(date,part)\n",
" print(filename)\n",
" newfilename=folderpath+filename\n",
" ncfilepaths='*{}.nc'.format(part)\n",
" #cord_extent='latitude>23.005;latitude<38.885;longitude>83;longitude<98.88'\n",
" p0=split_extent[part]\n",
" cord_extent='latitude>{};latitude<{};longitude>{};longitude<{}'.format(p0['s'],p0['n'],p0['w'],p0['e'])\n",
" #bin_spatial='1588,23.005,resolution,1588,83.0,resolution'\n",
" #bin_spatial_up=str(int(p0['ygrid']))+','+str(p0['s'])+','+str(resolution)+','+str(int(p0['xgrid']))+','+str(p0['w'])+','+str(resolution)\n",
" command=\"harpmerge -ap 'bin(); squash(time, (latitude,longitude))' -a '{}; \".format(cord_extent)+harp_merge_cmdkeep+ \" {} {}\".format(ncfilepaths,newfilename)\n",
" #print(\" {} {}\".format(ncfilepath,newfilename))\n",
" print(command)\n",
" os.system(command)\n",
" else:\n",
" pass"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## unit conversion function"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def s5p_micro_mol_convertor(date,folderpath):\n",
" ncfolder_loc = Path(folderpath)\n",
" os.chdir(folderpath)\n",
" if ncfolder_loc.exists():\n",
" parts_list=['p0','p1','p2','p3']\n",
" for part in parts_list:\n",
" #xt1,xt2,xt3,xt4,xt5=[],[],[],[],[]\n",
" filename='{}_{}.nc'.format(date,part)\n",
" ncfilename=folderpath+filename\n",
" ncfilename_loc = Path(ncfilename)\n",
" newfilename=folderpath+'{}_{}_inm.nc'.format(date,part)\n",
" if ncfilename_loc.exists():\n",
" xt1=xr.open_dataset(ncfilename,decode_times=False)\n",
" tyu=xt1['tropospheric_NO2_column_number_density']*1000000\n",
" print(tyu)\n",
" tyu.to_netcdf(newfilename)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## join the tiles into mosaic function, using numpy concatenate"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def productname_format(productname):\n",
" newproductname0=productname.split('_')\n",
" newproductname1 = filter(None, newproductname0)\n",
" newproductname='_'.join(newproductname1)\n",
" return newproductname\n",
"\n",
"\n",
"def part_merge(date,folderpath,productname,resolution):\n",
" newproductname=productname_format(productname)\n",
" datefolder=folderpath\n",
" ncfolder_loc = Path(datefolder)\n",
" if ncfolder_loc.exists():\n",
" os.chdir(datefolder)\n",
" p0=datefolder+'{}_{}_inm.nc'.format(date,'p0')\n",
" p0_loc=Path(p0)\n",
" if p0_loc.exists():\n",
" xp0=xr.open_dataset(p0,decode_times=False)\n",
" p1=datefolder+'{}_{}_inm.nc'.format(date,'p1')\n",
" p1_loc=Path(p1)\n",
" if p1_loc.exists():\n",
" xp1=xr.open_dataset(p1,decode_times=False)\n",
" xp1a=xp1.sel(longitude=slice(xp0.longitude.max()+resolution,xp1.longitude.max()))\n",
" p2=datefolder+'{}_{}_inm.nc'.format(date,'p2')\n",
" p2_loc=Path(p2)\n",
" if p2_loc.exists():\n",
" xp2=xr.open_dataset(p2,decode_times=False)\n",
" xp2a=xp2.sel(longitude=slice(xp0.longitude.max()+resolution,xp2.longitude.max()))\n",
" xp2b=xp2a.sel(latitude=slice(xp0.latitude.max()+resolution,xp2.latitude.max()))\n",
" p3=datefolder+'{}_{}_inm.nc'.format(date,'p3')\n",
" p3_loc=Path(p3)\n",
" if p3_loc.exists():\n",
" xp3=xr.open_dataset(p3,decode_times=False)\n",
" xp3a=xp3.sel(latitude=slice(xp0.latitude.max()+resolution,xp3.latitude.max()))\n",
" #xp12=xr.combine_by_coords([xp0,xp1a,xp2b,xp3a])\n",
" #xp12a=xp12.drop_vars('datetime_stop')\n",
" newlong=np.concatenate([xp0.longitude.values,xp1a.longitude.values])\n",
" newlat=np.concatenate([xp0.latitude.values,xp3a.latitude.values])\n",
" p0=xp0.tropospheric_NO2_column_number_density.values\n",
" p1=xp1a.tropospheric_NO2_column_number_density.values\n",
" p01=np.concatenate((p0[0],p1[0]), axis=1)\n",
" p3=xp3a.tropospheric_NO2_column_number_density.values\n",
" p2=xp2b.tropospheric_NO2_column_number_density.values\n",
" p32=np.concatenate((p3[0],p2[0]), axis=1)\n",
" all_p=np.concatenate((p01,p32), axis=0)\n",
" all_p3d=all_p.reshape(1,all_p.shape[0],all_p.shape[1])\n",
" ds = xr.Dataset({'tropospheric_NO2_column_number_density': (['time', 'latitude', 'longitude'], all_p3d )},\n",
" coords={'latitude': (newlat),\n",
" 'longitude': (newlong)})\n",
" ds.attrs['date'] = date\n",
" ds.attrs['units'] = 'daily average in µmol/m2'\n",
" newproductname=productname_format(productname)\n",
" ds.to_netcdf(datefolder+'{}_{}.nc'.format(date,newproductname))\n",
" else:\n",
" pass"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## applying the functions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"resolution=0.01\n",
"\n",
"plot_extent={'e':67.12,'n':38.88,'s':7.125,'w':98.88,}\n",
"\n",
"productname='L2__NO2___'\n",
"\n",
"date=20200604\n",
"\n",
"ncfilefolderpath='/home/20200604/'\n",
"\n",
"newfilename='newfilename'\n",
"\n",
"split_extent=extent_split(plot_extent,resolution)\n",
"\n",
"harp_convert(ncfilefolderpath,split_extent,resolution)\n",
"\n",
"harp_merge(date,ncfilefolderpath,split_extent,resolution)\n",
"\n",
"s5p_micro_mol_convertor(date,ncfilefolderpath)\n",
"\n",
"part_merge(date,ncfilefolderpath,productname,resolution)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment