Discharge grouped by Mouginot 2019 regions or Zwally 2012 sectors does not agree (discovered in TMB product)
Here, reproduce the issue and fix the bug.
Note that in this product, sectors and regions both come from Mouginot 2019, while in the TMB product, sectors come from Zwally 2012.
- v40 (and earlier) exhibits bug
- https://dataverse01.geus.dk/dataset.xhtml?persistentId=doi:10.22008/promice/data/ice_discharge/d/v02&version=40.0
wget "https://dataverse01.geus.dk/api/access/datafile/:persistentId?persistentId=doi:10.22008/promice/data/ice_discharge/d/v02/DFDD7I" -O ~/tmp/gates.nc
wget 'https://dataverse01.geus.dk/api/access/datafile/:persistentId?persistentId=doi:10.22008/promice/data/ice_discharge/d/v02/UC6DJM' -O ~/tmp/regions.nc
wget 'https://dataverse01.geus.dk/api/access/datafile/:persistentId?persistentId=doi:10.22008/promice/data/ice_discharge/d/v02/ZFL44G' -O ~/tmp/sectors.nc
import xarray as xr
g = xr.open_dataset('~/tmp/gates.nc')
r = xr.open_dataset('~/tmp/regions.nc')
s = xr.open_dataset('~/tmp/sectors.nc')
print('S - R', (s.sum(dim='sector') - r.sum(dim='region')).sum()['discharge'])
print('\n\nGIS - R', (g.sum(dim='gate') - r.sum(dim='region')).sum()['discharge'])
print('\n\nGIS - S', (g.sum(dim='gate') - s.sum(dim='sector')).sum()['discharge'])
print('\n\nGIS - S different', (g.groupby('sector').sum() - s).sum(dim='sector').sum()['discharge'])
print('\n\nGIS - R different', (g.groupby('region').sum() - r).sum(dim='region').sum()['discharge'])
print('\nS & R from gates', (g.groupby('sector').sum().sum(dim='sector') - g.groupby('region').sum().sum(dim='region')).sum()['discharge'])
S - R <xarray.DataArray 'discharge' ()> array(0.21765137, dtype=float32) GIS - R <xarray.DataArray 'discharge' ()> array(-0.04473877, dtype=float32) GIS - S <xarray.DataArray 'discharge' ()> array(-0.26239014, dtype=float32) GIS - S different <xarray.DataArray 'discharge' ()> array(0., dtype=float32) GIS - R different <xarray.DataArray 'discharge' ()> array(-0.0474062, dtype=float32) S & R from gates <xarray.DataArray 'discharge' ()> array(-0.0039978, dtype=float32)
import xarray as xr
g = xr.open_dataset('./out/gate.nc')
r = xr.open_dataset('./out/region.nc')
s = xr.open_dataset('./out/sector.nc')
print('S - R', (s.sum(dim='sector') - r.sum(dim='region')).sum()['discharge'])
print('S - R', (s.sum(dim='sector').sum() - r.sum(dim='region').sum())['discharge'])
print('\n\nGIS - R', (g.sum(dim='gate') - r.sum(dim='region')).sum()['discharge'])
print('\n\nGIS - S', (g.sum(dim='gate') - s.sum(dim='sector')).sum()['discharge'])
print('\n\nGIS - S different', (g.groupby('sector').sum() - s).sum(dim='sector').sum()['discharge'])
print('\n\nGIS - R different', (g.groupby('region').sum() - r).sum(dim='region').sum()['discharge'])
print('\nS & R from gates', (g.groupby('sector').sum().sum(dim='sector') - g.groupby('region').sum().sum(dim='region')).sum()['discharge'])
S - R <xarray.DataArray 'discharge' ()> array(-1.13686838e-12) S - R <xarray.DataArray 'discharge' ()> array(-2.91038305e-11) GIS - R <xarray.DataArray 'discharge' ()> array(-0.00085955) GIS - S <xarray.DataArray 'discharge' ()> array(-0.00085955) GIS - S different <xarray.DataArray 'discharge' ()> array(0.) GIS - R different <xarray.DataArray 'discharge' ()> array(-0.00233202) S & R from gates <xarray.DataArray 'discharge' ()> array(0.00628662, dtype=float32)
# git lg | head
git diff sector_region_disagree main
# git diff HEAD
R = pd.read_csv(‘./out/gate_meta.csv’) meta[‘name’] = ” for g in meta[‘gates’].unique(): meta.loc[meta[‘gates’] == g, ‘name’] = R[R[‘gate’] == g][‘Mouginot_2019’].values
-# from IPython import embed; embed()
-### GEUS-Glaciology-and-Climate/ice_discharge#28 -### Gates span sectors and regions. Assign to their primary sector or region -### meta.groupby(‘gates’).mean()[‘sectors’].values -### meta[meta[‘gates’] == 239].sectors -### Don’t seem to span regions (run same code but do it above before “map(regions.get)” -for g in meta[‘gates’].unique():
- meta.loc[meta[‘gates’] == g, ‘sectors’] = meta[meta[‘gates’] == g][‘sectors’].mode()
- # meta.loc[meta[‘gates’] == g, ‘regions’] = meta[meta[‘gates’] == g][‘regions’].mode()
- ### ### Load BASELINE velocity ### @@ -3796,9 +3783,9 @@ D_gatesT.index.name = “Date” D_gates_errT.index.name = “Date” D_gates_fill_weightT.index.name = “Date”
-D_gatesT.to_csv(‘./out/gate_D.csv’) -D_gates_errT.to_csv(‘./out/gate_err.csv’) -D_gates_fill_weightT.to_csv(‘./out/gate_coverage.csv’) +D_gatesT.to_csv(‘./out/gate_D.csv’, float_format=’%.3f’) +D_gates_errT.to_csv(‘./out/gate_err.csv’, float_format=’%.3f’) +D_gates_fill_weightT.to_csv(‘./out/gate_coverage.csv’, float_format=’%.3f’)
**** Sectors @@ -3818,9 +3805,9 @@ D_sectors_errT.index.name = “Date” D_sectors_fill_weightT.index.name = “Date”
-D_sectorsT.to_csv(‘./out/sector_D.csv’) -D_sectors_errT.to_csv(‘./out/sector_err.csv’) -D_sectors_fill_weightT.to_csv(‘./out/sector_coverage.csv’) +D_sectorsT.to_csv(‘./out/sector_D.csv’, float_format=’%.3f’) +D_sectors_errT.to_csv(‘./out/sector_err.csv’, float_format=’%.3f’) +D_sectors_fill_weightT.to_csv(‘./out/sector_coverage.csv’, float_format=’%.3f’)
@@ -3838,9 +3825,9 @@ D_regions_errT.index.name = “Date” D_regions_fill_weightT.index.name = “Date”
-D_regionsT.to_csv(‘./out/region_D.csv’) -D_regions_errT.to_csv(‘./out/region_err.csv’) -D_regions_fill_weightT.to_csv(‘./out/region_coverage.csv’) +D_regionsT.to_csv(‘./out/region_D.csv’, float_format=’%.3f’) +D_regions_errT.to_csv(‘./out/region_err.csv’, float_format=’%.3f’) +D_regions_fill_weightT.to_csv(‘./out/region_coverage.csv’, float_format=’%.3f’)
@@ -3921,19 +3908,19 @@ ds[“time”].attrs[“standard_name”] = “time” ds[“time”].attrs[“axis”] = “T” ds[“time”].attrs[“cf_role”] = “timeseries_id”
-ds[“discharge”] = (“time”, df_D[‘Discharge [Gt yr-1]’]) +ds[“discharge”] = (“time”, df_D[‘Discharge [Gt yr-1]’].astype(np.float32)) ds[“discharge”].attrs[“long_name”] = “Discharge” ds[“discharge”].attrs[“standard_name”] = “land_ice_mass_tranport_due_to_calving_and_ice_front_melting” ds[“discharge”].attrs[“units”] = “Gt yr-1” ds[“discharge”].attrs[“coordinates”] = “time”
-ds[“err”] = (“time”, df_err[‘Discharge Error [Gt yr-1]’]) +ds[“err”] = (“time”, df_err[‘Discharge Error [Gt yr-1]’].astype(np.float32)) ds[“err”].attrs[“long_name”] = “Error” ds[“err”].attrs[“standard_name”] = “Uncertainty” ds[“err”].attrs[“units”] = “Gt yr-1” ds[“err”].attrs[“coordinates”] = “time”
-ds[“coverage”] = (“time”, df_coverage[‘Coverage [unit]’]) +ds[“coverage”] = (“time”, df_coverage[‘Coverage [unit]’].astype(np.float32)) ds[“coverage”].attrs[“long_name”] = “Coverage” ds[“coverage”].attrs[“standard_name”] = “Coverage”
@@ -3992,19 +3979,19 @@ ds[“region”].attrs[“long_name”] = “Region” ds[“region”].attrs[“standard_name”] = “N/A” ds[“region”].attrs[“comment”] = “Regions from Mouginot (2019)”
-ds[“discharge”] = ((“region”, “time”), df_D.T.values) +ds[“discharge”] = ((“region”, “time”), df_D.T.values.astype(np.float32)) ds[“discharge”].attrs[“long_name”] = “Discharge” ds[“discharge”].attrs[“standard_name”] = “land_ice_mass_tranport_due_to_calving_and_ice_front_melting” ds[“discharge”].attrs[“units”] = “Gt yr-1” ds[“discharge”].attrs[“coordinates”] = “time region”
-ds[“err”] = ((“region”, “time”), df_err.T.values) +ds[“err”] = ((“region”, “time”), df_err.T.values.astype(np.float32)) ds[“err”].attrs[“long_name”] = “Error” ds[“err”].attrs[“standard_name”] = “Uncertainty” ds[“err”].attrs[“units”] = “Gt yr-1” ds[“err”].attrs[“coordinates”] = “time region”
-ds[“coverage”] = ((“region”, “time”), df_coverage.T.values) +ds[“coverage”] = ((“region”, “time”), df_coverage.T.values.astype(np.float32)) ds[“coverage”].attrs[“long_name”] = “Coverage” ds[“coverage”].attrs[“standard_name”] = “Coverage”
@@ -4084,19 +4071,19 @@ ds[“sector”].attrs[“long_name”] = “Sector” ds[“sector”].attrs[“standard_name”] = “N/A” ds[“sector”].attrs[“comment”] = “Sectors from Mouginot (2019)”
-ds[“discharge”] = ((“sector”, “time”), df_D.T.values) +ds[“discharge”] = ((“sector”, “time”), df_D.T.values.astype(np.float32)) ds[“discharge”].attrs[“long_name”] = “Discharge” ds[“discharge”].attrs[“standard_name”] = “land_ice_mass_tranport_due_to_calving_and_ice_front_melting” ds[“discharge”].attrs[“units”] = “Gt yr-1” ds[“discharge”].attrs[“coordinates”] = “time sector”
-ds[“err”] = ((“sector”, “time”), df_err.T.values) +ds[“err”] = ((“sector”, “time”), df_err.T.values.astype(np.float32)) ds[“err”].attrs[“long_name”] = “Error” ds[“err”].attrs[“standard_name”] = “Uncertainty” ds[“err”].attrs[“units”] = “Gt yr-1” ds[“err”].attrs[“coordinates”] = “time sector”
-ds[“coverage”] = ((“sector”, “time”), df_coverage.T.values) +ds[“coverage”] = ((“sector”, “time”), df_coverage.T.values.astype(np.float32)) ds[“coverage”].attrs[“long_name”] = “Coverage” ds[“coverage”].attrs[“standard_name”] = “Coverage”
diff –git a/raw2discharge.py b/raw2discharge.py index 6893f6f..5e74fe5 100755 — a/raw2discharge.py +++ b/raw2discharge.py @@ -17,24 +17,12 @@ meta.rename(inplace=True, columns={‘regions@Mouginot_2019’:’regions’, ‘gates_gateID@gates_100_5000’:’gates’}) regions = {1:’NO’, 2:’NE’, 3:’CE’, 4:’SE’, 5:’SW’, 6:’CW’, 7:’NW’} meta[‘regions’] = meta[‘regions’].map(regions.get) # Convert sector numbers to meaningful names -# UNDO with foo.replace({‘NO’:1,’NW’:2,’NE’:3,’CW’:4,’CE’:5,’SW’:6,’SE’:7}) meta[‘ones’] = 1
R = pd.read_csv(‘./out/gate_meta.csv’) meta[‘name’] = ” for g in meta[‘gates’].unique(): meta.loc[meta[‘gates’] == g, ‘name’] = R[R[‘gate’] == g][‘Mouginot_2019’].values
-# from IPython import embed; embed()
-### GEUS-Glaciology-and-Climate/ice_discharge#28 -### Gates span sectors and regions. Assign to their primary sector or region -### meta.groupby(‘gates’).mean()[‘sectors’].values -### meta[meta[‘gates’] == 239].sectors -### Don’t seem to span regions (run same code but do it above before “map(regions.get)” -for g in meta[‘gates’].unique():
- meta.loc[meta[‘gates’] == g, ‘sectors’] = meta[meta[‘gates’] == g][‘sectors’].mode()
- # meta.loc[meta[‘gates’] == g, ‘regions’] = meta[meta[‘gates’] == g][‘regions’].mode()
- ### ### Load BASELINE velocity ### @@ -396,9 +384,9 @@ D_gatesT.index.name = “Date” D_gates_errT.index.name = “Date” D_gates_fill_weightT.index.name = “Date”
-D_gatesT.to_csv(‘./out/gate_D.csv’) -D_gates_errT.to_csv(‘./out/gate_err.csv’) -D_gates_fill_weightT.to_csv(‘./out/gate_coverage.csv’) +D_gatesT.to_csv(‘./out/gate_D.csv’, float_format=’%.3f’) +D_gates_errT.to_csv(‘./out/gate_err.csv’, float_format=’%.3f’) +D_gates_fill_weightT.to_csv(‘./out/gate_coverage.csv’, float_format=’%.3f’)
@@ -415,9 +403,9 @@ D_sectors_errT.index.name = “Date” D_sectors_fill_weightT.index.name = “Date”
-D_sectorsT.to_csv(‘./out/sector_D.csv’) -D_sectors_errT.to_csv(‘./out/sector_err.csv’) -D_sectors_fill_weightT.to_csv(‘./out/sector_coverage.csv’) +D_sectorsT.to_csv(‘./out/sector_D.csv’, float_format=’%.3f’) +D_sectors_errT.to_csv(‘./out/sector_err.csv’, float_format=’%.3f’) +D_sectors_fill_weightT.to_csv(‘./out/sector_coverage.csv’, float_format=’%.3f’)
@@ -432,9 +420,9 @@ D_regions_errT.index.name = “Date” D_regions_fill_weightT.index.name = “Date”
-D_regionsT.to_csv(‘./out/region_D.csv’) -D_regions_errT.to_csv(‘./out/region_err.csv’) -D_regions_fill_weightT.to_csv(‘./out/region_coverage.csv’) +D_regionsT.to_csv(‘./out/region_D.csv’, float_format=’%.3f’) +D_regions_errT.to_csv(‘./out/region_err.csv’, float_format=’%.3f’) +D_regions_fill_weightT.to_csv(‘./out/region_coverage.csv’, float_format=’%.3f’)
diff –git a/vel_eff.sh b/vel_eff.sh index 6dee8db..9399d18 100755 — a/vel_eff.sh +++ b/vel_eff.sh @@ -38,7 +38,7 @@ function ctrl_c() {
-# Just one velocity cutoff & buffer distance:1 +# Just one velocity cutoff & buffer distance:1 g.mapsets -l
r.mask -r @@ -108,7 +108,7 @@ done
-# sentinel1_effective_velocity +# sentinel1_effective_velocity g.mapset Sentinel1 g.region -d r.mapcalc “MASK = if((gates_x@${MAPSET} == 1) | (gates_y@${MAPSET} == 1), 1, null())” –o @@ -124,7 +124,7 @@ for VX in $(g.list type=raster pattern=vx_????_??_??); do done
-# Just one velocity cutoff & buffer distance:3 +# Just one velocity cutoff & buffer distance:3
MSG_OK “vel_eff DONE”
Discharge error grows larger when interpolating, as done with TMB product.
g = xr.open_dataset('./out/gate.nc')
s = xr.open_dataset('./out/sector.nc')
r = xr.open_dataset('./out/region.nc')
s.sum().sum() - r.sum().sum()
# vs.
s.resample({'time':'1D'}).mean().interpolate_na(dim='time', method='pchip').sum().sum() - r.resample({'time':'1D'}).mean().interpolate_na(dim='time', method='pchip').sum().sum()
See GEUS-Glaciology-and-Climate/ice_discharge#28