Skip to content

Instantly share code, notes, and snippets.

@mankoff
Last active November 11, 2021 14:54
Show Gist options
  • Save mankoff/4e4ae109824e1a4a72659fad025e24ea to your computer and use it in GitHub Desktop.
Save mankoff/4e4ae109824e1a4a72659fad025e24ea to your computer and use it in GitHub Desktop.
Ice discharge disagreement betwene M2019 and Z2012

Introduction

Discharge grouped by Mouginot 2019 regions or Zwally 2012 sectors does not agree (discovered in TMB product)

Here, reproduce the issue and fix the bug.

Note that in this product, sectors and regions both come from Mouginot 2019, while in the TMB product, sectors come from Zwally 2012.

Reproduce

wget "https://dataverse01.geus.dk/api/access/datafile/:persistentId?persistentId=doi:10.22008/promice/data/ice_discharge/d/v02/DFDD7I" -O ~/tmp/gates.nc
wget 'https://dataverse01.geus.dk/api/access/datafile/:persistentId?persistentId=doi:10.22008/promice/data/ice_discharge/d/v02/UC6DJM' -O ~/tmp/regions.nc
wget 'https://dataverse01.geus.dk/api/access/datafile/:persistentId?persistentId=doi:10.22008/promice/data/ice_discharge/d/v02/ZFL44G' -O ~/tmp/sectors.nc
import xarray as xr

g = xr.open_dataset('~/tmp/gates.nc')
r = xr.open_dataset('~/tmp/regions.nc')
s = xr.open_dataset('~/tmp/sectors.nc')

print('S - R', (s.sum(dim='sector') - r.sum(dim='region')).sum()['discharge'])
print('\n\nGIS - R', (g.sum(dim='gate') - r.sum(dim='region')).sum()['discharge'])
print('\n\nGIS - S', (g.sum(dim='gate') - s.sum(dim='sector')).sum()['discharge'])
print('\n\nGIS - S different', (g.groupby('sector').sum() - s).sum(dim='sector').sum()['discharge'])
print('\n\nGIS - R different', (g.groupby('region').sum() - r).sum(dim='region').sum()['discharge'])

print('\nS & R from gates', (g.groupby('sector').sum().sum(dim='sector') - g.groupby('region').sum().sum(dim='region')).sum()['discharge'])

S - R <xarray.DataArray 'discharge' ()>
array(0.21765137, dtype=float32)


GIS - R <xarray.DataArray 'discharge' ()>
array(-0.04473877, dtype=float32)


GIS - S <xarray.DataArray 'discharge' ()>
array(-0.26239014, dtype=float32)


GIS - S different <xarray.DataArray 'discharge' ()>
array(0., dtype=float32)


GIS - R different <xarray.DataArray 'discharge' ()>
array(-0.0474062, dtype=float32)

S & R from gates <xarray.DataArray 'discharge' ()>
array(-0.0039978, dtype=float32)

Improve

Results of changes

import xarray as xr

g = xr.open_dataset('./out/gate.nc')
r = xr.open_dataset('./out/region.nc')
s = xr.open_dataset('./out/sector.nc')

print('S - R', (s.sum(dim='sector') - r.sum(dim='region')).sum()['discharge'])
print('S - R', (s.sum(dim='sector').sum() - r.sum(dim='region').sum())['discharge'])
print('\n\nGIS - R', (g.sum(dim='gate') - r.sum(dim='region')).sum()['discharge'])
print('\n\nGIS - S', (g.sum(dim='gate') - s.sum(dim='sector')).sum()['discharge'])
print('\n\nGIS - S different', (g.groupby('sector').sum() - s).sum(dim='sector').sum()['discharge'])
print('\n\nGIS - R different', (g.groupby('region').sum() - r).sum(dim='region').sum()['discharge'])

print('\nS & R from gates', (g.groupby('sector').sum().sum(dim='sector') - g.groupby('region').sum().sum(dim='region')).sum()['discharge'])

S - R <xarray.DataArray 'discharge' ()>
array(-1.13686838e-12)
S - R <xarray.DataArray 'discharge' ()>
array(-2.91038305e-11)


GIS - R <xarray.DataArray 'discharge' ()>
array(-0.00085955)


GIS - S <xarray.DataArray 'discharge' ()>
array(-0.00085955)


GIS - S different <xarray.DataArray 'discharge' ()>
array(0.)


GIS - R different <xarray.DataArray 'discharge' ()>
array(-0.00233202)

S & R from gates <xarray.DataArray 'discharge' ()>
array(0.00628662, dtype=float32)

Diff of changes

# git lg | head
git diff sector_region_disagree main
# git diff HEAD

R = pd.read_csv(‘./out/gate_meta.csv’) meta[‘name’] = ” for g in meta[‘gates’].unique(): meta.loc[meta[‘gates’] == g, ‘name’] = R[R[‘gate’] == g][‘Mouginot_2019’].values

-# from IPython import embed; embed()

-### GEUS-Glaciology-and-Climate/ice_discharge#28 -### Gates span sectors and regions. Assign to their primary sector or region -### meta.groupby(‘gates’).mean()[‘sectors’].values -### meta[meta[‘gates’] == 239].sectors -### Don’t seem to span regions (run same code but do it above before “map(regions.get)” -for g in meta[‘gates’].unique():

  • meta.loc[meta[‘gates’] == g, ‘sectors’] = meta[meta[‘gates’] == g][‘sectors’].mode()
  • # meta.loc[meta[‘gates’] == g, ‘regions’] = meta[meta[‘gates’] == g][‘regions’].mode()

- ### ### Load BASELINE velocity ### @@ -3796,9 +3783,9 @@ D_gatesT.index.name = “Date” D_gates_errT.index.name = “Date” D_gates_fill_weightT.index.name = “Date”

-D_gatesT.to_csv(‘./out/gate_D.csv’) -D_gates_errT.to_csv(‘./out/gate_err.csv’) -D_gates_fill_weightT.to_csv(‘./out/gate_coverage.csv’) +D_gatesT.to_csv(‘./out/gate_D.csv’, float_format=’%.3f’) +D_gates_errT.to_csv(‘./out/gate_err.csv’, float_format=’%.3f’) +D_gates_fill_weightT.to_csv(‘./out/gate_coverage.csv’, float_format=’%.3f’)

**** Sectors @@ -3818,9 +3805,9 @@ D_sectors_errT.index.name = “Date” D_sectors_fill_weightT.index.name = “Date”

-D_sectorsT.to_csv(‘./out/sector_D.csv’) -D_sectors_errT.to_csv(‘./out/sector_err.csv’) -D_sectors_fill_weightT.to_csv(‘./out/sector_coverage.csv’) +D_sectorsT.to_csv(‘./out/sector_D.csv’, float_format=’%.3f’) +D_sectors_errT.to_csv(‘./out/sector_err.csv’, float_format=’%.3f’) +D_sectors_fill_weightT.to_csv(‘./out/sector_coverage.csv’, float_format=’%.3f’)

@@ -3838,9 +3825,9 @@ D_regions_errT.index.name = “Date” D_regions_fill_weightT.index.name = “Date”

-D_regionsT.to_csv(‘./out/region_D.csv’) -D_regions_errT.to_csv(‘./out/region_err.csv’) -D_regions_fill_weightT.to_csv(‘./out/region_coverage.csv’) +D_regionsT.to_csv(‘./out/region_D.csv’, float_format=’%.3f’) +D_regions_errT.to_csv(‘./out/region_err.csv’, float_format=’%.3f’) +D_regions_fill_weightT.to_csv(‘./out/region_coverage.csv’, float_format=’%.3f’)

@@ -3921,19 +3908,19 @@ ds[“time”].attrs[“standard_name”] = “time” ds[“time”].attrs[“axis”] = “T” ds[“time”].attrs[“cf_role”] = “timeseries_id”

-ds[“discharge”] = (“time”, df_D[‘Discharge [Gt yr-1]’]) +ds[“discharge”] = (“time”, df_D[‘Discharge [Gt yr-1]’].astype(np.float32)) ds[“discharge”].attrs[“long_name”] = “Discharge” ds[“discharge”].attrs[“standard_name”] = “land_ice_mass_tranport_due_to_calving_and_ice_front_melting” ds[“discharge”].attrs[“units”] = “Gt yr-1” ds[“discharge”].attrs[“coordinates”] = “time”

-ds[“err”] = (“time”, df_err[‘Discharge Error [Gt yr-1]’]) +ds[“err”] = (“time”, df_err[‘Discharge Error [Gt yr-1]’].astype(np.float32)) ds[“err”].attrs[“long_name”] = “Error” ds[“err”].attrs[“standard_name”] = “Uncertainty” ds[“err”].attrs[“units”] = “Gt yr-1” ds[“err”].attrs[“coordinates”] = “time”

-ds[“coverage”] = (“time”, df_coverage[‘Coverage [unit]’]) +ds[“coverage”] = (“time”, df_coverage[‘Coverage [unit]’].astype(np.float32)) ds[“coverage”].attrs[“long_name”] = “Coverage” ds[“coverage”].attrs[“standard_name”] = “Coverage”

@@ -3992,19 +3979,19 @@ ds[“region”].attrs[“long_name”] = “Region” ds[“region”].attrs[“standard_name”] = “N/A” ds[“region”].attrs[“comment”] = “Regions from Mouginot (2019)”

-ds[“discharge”] = ((“region”, “time”), df_D.T.values) +ds[“discharge”] = ((“region”, “time”), df_D.T.values.astype(np.float32)) ds[“discharge”].attrs[“long_name”] = “Discharge” ds[“discharge”].attrs[“standard_name”] = “land_ice_mass_tranport_due_to_calving_and_ice_front_melting” ds[“discharge”].attrs[“units”] = “Gt yr-1” ds[“discharge”].attrs[“coordinates”] = “time region”

-ds[“err”] = ((“region”, “time”), df_err.T.values) +ds[“err”] = ((“region”, “time”), df_err.T.values.astype(np.float32)) ds[“err”].attrs[“long_name”] = “Error” ds[“err”].attrs[“standard_name”] = “Uncertainty” ds[“err”].attrs[“units”] = “Gt yr-1” ds[“err”].attrs[“coordinates”] = “time region”

-ds[“coverage”] = ((“region”, “time”), df_coverage.T.values) +ds[“coverage”] = ((“region”, “time”), df_coverage.T.values.astype(np.float32)) ds[“coverage”].attrs[“long_name”] = “Coverage” ds[“coverage”].attrs[“standard_name”] = “Coverage”

@@ -4084,19 +4071,19 @@ ds[“sector”].attrs[“long_name”] = “Sector” ds[“sector”].attrs[“standard_name”] = “N/A” ds[“sector”].attrs[“comment”] = “Sectors from Mouginot (2019)”

-ds[“discharge”] = ((“sector”, “time”), df_D.T.values) +ds[“discharge”] = ((“sector”, “time”), df_D.T.values.astype(np.float32)) ds[“discharge”].attrs[“long_name”] = “Discharge” ds[“discharge”].attrs[“standard_name”] = “land_ice_mass_tranport_due_to_calving_and_ice_front_melting” ds[“discharge”].attrs[“units”] = “Gt yr-1” ds[“discharge”].attrs[“coordinates”] = “time sector”

-ds[“err”] = ((“sector”, “time”), df_err.T.values) +ds[“err”] = ((“sector”, “time”), df_err.T.values.astype(np.float32)) ds[“err”].attrs[“long_name”] = “Error” ds[“err”].attrs[“standard_name”] = “Uncertainty” ds[“err”].attrs[“units”] = “Gt yr-1” ds[“err”].attrs[“coordinates”] = “time sector”

-ds[“coverage”] = ((“sector”, “time”), df_coverage.T.values) +ds[“coverage”] = ((“sector”, “time”), df_coverage.T.values.astype(np.float32)) ds[“coverage”].attrs[“long_name”] = “Coverage” ds[“coverage”].attrs[“standard_name”] = “Coverage”

diff –git a/raw2discharge.py b/raw2discharge.py index 6893f6f..5e74fe5 100755 — a/raw2discharge.py +++ b/raw2discharge.py @@ -17,24 +17,12 @@ meta.rename(inplace=True, columns={‘regions@Mouginot_2019’:’regions’, ‘gates_gateID@gates_100_5000’:’gates’}) regions = {1:’NO’, 2:’NE’, 3:’CE’, 4:’SE’, 5:’SW’, 6:’CW’, 7:’NW’} meta[‘regions’] = meta[‘regions’].map(regions.get) # Convert sector numbers to meaningful names -# UNDO with foo.replace({‘NO’:1,’NW’:2,’NE’:3,’CW’:4,’CE’:5,’SW’:6,’SE’:7}) meta[‘ones’] = 1

R = pd.read_csv(‘./out/gate_meta.csv’) meta[‘name’] = ” for g in meta[‘gates’].unique(): meta.loc[meta[‘gates’] == g, ‘name’] = R[R[‘gate’] == g][‘Mouginot_2019’].values

-# from IPython import embed; embed()

-### GEUS-Glaciology-and-Climate/ice_discharge#28 -### Gates span sectors and regions. Assign to their primary sector or region -### meta.groupby(‘gates’).mean()[‘sectors’].values -### meta[meta[‘gates’] == 239].sectors -### Don’t seem to span regions (run same code but do it above before “map(regions.get)” -for g in meta[‘gates’].unique():

  • meta.loc[meta[‘gates’] == g, ‘sectors’] = meta[meta[‘gates’] == g][‘sectors’].mode()
  • # meta.loc[meta[‘gates’] == g, ‘regions’] = meta[meta[‘gates’] == g][‘regions’].mode()

- ### ### Load BASELINE velocity ### @@ -396,9 +384,9 @@ D_gatesT.index.name = “Date” D_gates_errT.index.name = “Date” D_gates_fill_weightT.index.name = “Date”

-D_gatesT.to_csv(‘./out/gate_D.csv’) -D_gates_errT.to_csv(‘./out/gate_err.csv’) -D_gates_fill_weightT.to_csv(‘./out/gate_coverage.csv’) +D_gatesT.to_csv(‘./out/gate_D.csv’, float_format=’%.3f’) +D_gates_errT.to_csv(‘./out/gate_err.csv’, float_format=’%.3f’) +D_gates_fill_weightT.to_csv(‘./out/gate_coverage.csv’, float_format=’%.3f’)

@@ -415,9 +403,9 @@ D_sectors_errT.index.name = “Date” D_sectors_fill_weightT.index.name = “Date”

-D_sectorsT.to_csv(‘./out/sector_D.csv’) -D_sectors_errT.to_csv(‘./out/sector_err.csv’) -D_sectors_fill_weightT.to_csv(‘./out/sector_coverage.csv’) +D_sectorsT.to_csv(‘./out/sector_D.csv’, float_format=’%.3f’) +D_sectors_errT.to_csv(‘./out/sector_err.csv’, float_format=’%.3f’) +D_sectors_fill_weightT.to_csv(‘./out/sector_coverage.csv’, float_format=’%.3f’)

@@ -432,9 +420,9 @@ D_regions_errT.index.name = “Date” D_regions_fill_weightT.index.name = “Date”

-D_regionsT.to_csv(‘./out/region_D.csv’) -D_regions_errT.to_csv(‘./out/region_err.csv’) -D_regions_fill_weightT.to_csv(‘./out/region_coverage.csv’) +D_regionsT.to_csv(‘./out/region_D.csv’, float_format=’%.3f’) +D_regions_errT.to_csv(‘./out/region_err.csv’, float_format=’%.3f’) +D_regions_fill_weightT.to_csv(‘./out/region_coverage.csv’, float_format=’%.3f’)

diff –git a/vel_eff.sh b/vel_eff.sh index 6dee8db..9399d18 100755 — a/vel_eff.sh +++ b/vel_eff.sh @@ -38,7 +38,7 @@ function ctrl_c() {

-# Just one velocity cutoff & buffer distance:1 +# Just one velocity cutoff & buffer distance:1 g.mapsets -l

r.mask -r @@ -108,7 +108,7 @@ done

-# sentinel1_effective_velocity +# sentinel1_effective_velocity g.mapset Sentinel1 g.region -d r.mapcalc “MASK = if((gates_x@${MAPSET} == 1) | (gates_y@${MAPSET} == 1), 1, null())” –o @@ -124,7 +124,7 @@ for VX in $(g.list type=raster pattern=vx_????_??_??); do done

-# Just one velocity cutoff & buffer distance:3 +# Just one velocity cutoff & buffer distance:3

MSG_OK “vel_eff DONE”

Notes

Discharge error grows larger when interpolating, as done with TMB product.

g = xr.open_dataset('./out/gate.nc')
s = xr.open_dataset('./out/sector.nc')
r = xr.open_dataset('./out/region.nc')

s.sum().sum() - r.sum().sum()

# vs.

s.resample({'time':'1D'}).mean().interpolate_na(dim='time', method='pchip').sum().sum() - r.resample({'time':'1D'}).mean().interpolate_na(dim='time', method='pchip').sum().sum() 
@mankoff
Copy link
Author

mankoff commented Nov 10, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment