nocollier/ztest.py

## ztest.py
"""ESGF could consider maintaining a database of mean and standard deivations for each
variable in each experiment/table. There should be a public API to this so tools could
be created to test model data that is to be submitted. For example, a z-statistic could
be used:

z = | (submission_mean - esgf_mean) / sqrt(submission_std**2 + esgf_std**2) |

If z < 2, then the distributions are to be considered similar. Perhaps there are better
statistical tests that could be performed if ESGF does the work of providing these
important scalars.
"""
import numpy as np

from intake_esgf import ESGFCatalog

# Define some constants
VARIABLE = "tas"
NUM_TIME = 24  # only the last 2 years to keep it fast

# Use intake-esgf to find some data across sources
cat = ESGFCatalog().search(
    experiment_id="historical", table_id="Amon", variable_id="tas"
)
cat.remove_ensembles()  # only take the 'first' ensemble, just to keep it small
dsd = cat.to_dataset_dict(add_measures=False)  # download into a big dictionary

# Now compute the mean and standard deviation of the variable in the last `NUM_TIME`
# time slices. This is what ESGF would provide already precomputed.
var_sum = 0.0
var_sum_squared = 0.0
n = 0
for key, ds in dsd.items():
    da = ds["tas"].isel(time=slice(-NUM_TIME, -1)).load()
    var_sum += da.sum()
    var_sum_squared += (da * da).sum()
    n += da.size
mean = var_sum / n
std = np.sqrt((var_sum_squared - var_sum * var_sum / n) / (n - 1))

# Now loop back through the models and randomly pick one to have values that are in the
# wrong units. This is just meant to simulate what could go wrong.
bad_one = np.random.randint(low=0, high=len(dsd) - 1)
for i, (key, ds) in enumerate(dsd.items()):
    da = ds["tas"].isel(time=slice(-NUM_TIME, -1)).load() - (i == bad_one) * 273.15
    mu = da.mean().values
    sigma = da.std().values
    z = np.abs((mu - mean) / np.sqrt(std * std + sigma * sigma))
    print(f"{key:>50} mean={mu:.2f} std={sigma:.2f} {z=:.2f}")

# All of the correct models have z ~ 0, but the wrong units z = 9. It is really easy to
# see a model that is anomalous. We could then highlight entries where z > threshold for
# the community to consider because they are significantly different than the rest.


"""
                        MRI.MRI-ESM2-0.r1i1p1f1.gn mean=279.04 std=21.10 z=0.01
                   MPI-M.MPI-ESM1-2-HR.r1i1p1f1.gn mean=279.19 std=21.37 z=0.00
                     MPI-M.ICON-ESM-LR.r1i1p1f1.gn mean=288.30 std=16.12 z=0.34
                        NCAR.CESM2-FV2.r1i1p1f1.gn mean=5.62 std=21.90 z=9.02  <----
                       SNU.SAM0-UNICON.r1i1p1f1.gn mean=276.58 std=22.81 z=0.09
                   MPI-M.MPI-ESM1-2-LR.r1i1p1f1.gn mean=278.98 std=20.77 z=0.01
                  NCAR.CESM2-WACCM-FV2.r1i1p1f1.gn mean=278.74 std=21.89 z=0.02
                      NCAR.CESM2-WACCM.r1i1p1f1.gn mean=278.84 std=21.54 z=0.01
                  NOAA-GFDL.GFDL-ESM4.r1i1p1f1.gr1 mean=278.21 std=21.85 z=0.03
  EC-Earth-Consortium.EC-Earth3-Veg-LR.r1i1p1f1.gr mean=279.48 std=19.98 z=0.01
                     AWI.AWI-CM-1-1-MR.r1i1p1f1.gn mean=279.11 std=21.24 z=0.00
                    AWI.AWI-ESM-1-1-LR.r1i1p1f1.gn mean=278.71 std=20.95 z=0.02
                       AS-RCEC.TaiESM1.r1i1p1f1.gn mean=278.25 std=22.45 z=0.03
                         CAS.FGOALS-g3.r1i1p1f1.gn mean=280.57 std=20.88 z=0.05
                      MIROC.MIROC-ES2H.r1i1p1f2.gn mean=279.72 std=20.64 z=0.02
                   CCCma.CanESM5-CanOE.r1i1p2f1.gn mean=279.23 std=21.61 z=0.00
                   NASA-GISS.GISS-E3-G.r1i1p1f1.gr mean=278.51 std=20.27 z=0.02
                  MOHC.HadGEM3-GC31-MM.r1i1p1f3.gn mean=279.09 std=21.21 z=0.00
                 E3SM-Project.E3SM-1-0.r1i1p1f1.gr mean=278.57 std=20.80 z=0.02
                        NCC.NorESM2-LM.r1i1p1f1.gn mean=278.81 std=21.58 z=0.01
 EC-Earth-Consortium.EC-Earth3-AerChem.r1i1p1f1.gr mean=279.77 std=19.76 z=0.02
         EC-Earth-Consortium.EC-Earth3.r1i1p1f1.gr mean=280.17 std=19.58 z=0.03
                 NASA-GISS.GISS-E2-1-G.r1i1p1f1.gn mean=278.33 std=22.36 z=0.03
                  FIO-QLNM.FIO-ESM-2-0.r1i1p1f1.gn mean=278.24 std=22.17 z=0.03
      EC-Earth-Consortium.EC-Earth3-CC.r1i1p1f1.gr mean=280.69 std=19.46 z=0.05
     EC-Earth-Consortium.EC-Earth3-Veg.r1i1p1f1.gr mean=279.86 std=19.82 z=0.02
                             THU.CIESM.r1i1p1f1.gr mean=279.91 std=21.24 z=0.02
               CNRM-CERFACS.CNRM-CM6-1.r1i1p1f2.gr mean=277.88 std=21.95 z=0.04
                     CMCC.CMCC-CM2-HR4.r1i1p1f1.gn mean=279.30 std=21.17 z=0.00
                 NASA-GISS.GISS-E2-2-H.r1i1p1f1.gn mean=277.12 std=22.08 z=0.07
              NASA-GISS.GISS-E2-1-G-CC.r1i1p1f1.gn mean=278.47 std=22.18 z=0.02
                            NCAR.CESM2.r1i1p1f1.gn mean=279.03 std=21.56 z=0.01
                   NOAA-GFDL.GFDL-CM4.r1i1p1f1.gr1 mean=277.83 std=21.53 z=0.05
                   NIMS-KMA.KACE-1-0-G.r1i1p1f1.gr mean=278.98 std=21.73 z=0.01
     HAMMOZ-Consortium.MPI-ESM-1-2-HAM.r1i1p1f1.gn mean=279.18 std=20.45 z=0.00
                        NCC.NorESM2-MM.r1i1p1f1.gn mean=278.47 std=21.80 z=0.03
                        INM.INM-CM5-0.r1i1p1f1.gr1 mean=278.23 std=21.56 z=0.03
              CNRM-CERFACS.CNRM-ESM2-1.r1i1p1f2.gr mean=278.85 std=21.38 z=0.01
                        INM.INM-CM4-8.r1i1p1f1.gr1 mean=278.03 std=22.02 z=0.04
                        CMCC.CMCC-ESM2.r1i1p1f1.gn mean=279.42 std=21.43 z=0.01
                     IPSL.IPSL-CM6A-LR.r1i1p1f1.gr mean=278.04 std=22.22 z=0.04
                 NASA-GISS.GISS-E2-1-H.r1i1p1f1.gn mean=278.94 std=21.59 z=0.01
                          BCC.BCC-ESM1.r1i1p1f1.gn mean=279.53 std=21.94 z=0.01
                  MOHC.HadGEM3-GC31-LL.r1i1p1f3.gn mean=278.70 std=21.84 z=0.02
                          MIROC.MIROC6.r1i1p1f1.gn mean=281.09 std=20.11 z=0.06
                      MIROC.MIROC-ES2L.r1i1p1f2.gn mean=281.24 std=19.00 z=0.07
                      MOHC.UKESM1-0-LL.r1i1p1f2.gn mean=278.18 std=22.26 z=0.03
                      KIOST.KIOST-ESM.r1i1p1f1.gr1 mean=278.18 std=20.73 z=0.04
                       BCC.BCC-CSM2-MR.r1i1p1f1.gn mean=279.86 std=21.15 z=0.02
"""
	"""ESGF could consider maintaining a database of mean and standard deivations for each
	variable in each experiment/table. There should be a public API to this so tools could
	be created to test model data that is to be submitted. For example, a z-statistic could
	be used:

	z = \| (submission_mean - esgf_mean) / sqrt(submission_std2 + esgf_std2) \|

	If z < 2, then the distributions are to be considered similar. Perhaps there are better
	statistical tests that could be performed if ESGF does the work of providing these
	important scalars.
	"""
	import numpy as np

	from intake_esgf import ESGFCatalog

	# Define some constants
	VARIABLE = "tas"
	NUM_TIME = 24 # only the last 2 years to keep it fast

	# Use intake-esgf to find some data across sources
	cat = ESGFCatalog().search(
	experiment_id="historical", table_id="Amon", variable_id="tas"
	)
	cat.remove_ensembles() # only take the 'first' ensemble, just to keep it small
	dsd = cat.to_dataset_dict(add_measures=False) # download into a big dictionary

	# Now compute the mean and standard deviation of the variable in the last `NUM_TIME`
	# time slices. This is what ESGF would provide already precomputed.
	var_sum = 0.0
	var_sum_squared = 0.0
	n = 0
	for key, ds in dsd.items():
	da = ds["tas"].isel(time=slice(-NUM_TIME, -1)).load()
	var_sum += da.sum()
	var_sum_squared += (da * da).sum()
	n += da.size
	mean = var_sum / n
	std = np.sqrt((var_sum_squared - var_sum * var_sum / n) / (n - 1))

	# Now loop back through the models and randomly pick one to have values that are in the
	# wrong units. This is just meant to simulate what could go wrong.
	bad_one = np.random.randint(low=0, high=len(dsd) - 1)
	for i, (key, ds) in enumerate(dsd.items()):
	da = ds["tas"].isel(time=slice(-NUM_TIME, -1)).load() - (i == bad_one) * 273.15
	mu = da.mean().values
	sigma = da.std().values
	z = np.abs((mu - mean) / np.sqrt(std * std + sigma * sigma))
	print(f"{key:>50} mean={mu:.2f} std={sigma:.2f} {z=:.2f}")

	# All of the correct models have z ~ 0, but the wrong units z = 9. It is really easy to
	# see a model that is anomalous. We could then highlight entries where z > threshold for
	# the community to consider because they are significantly different than the rest.


	"""
	MRI.MRI-ESM2-0.r1i1p1f1.gn mean=279.04 std=21.10 z=0.01
	MPI-M.MPI-ESM1-2-HR.r1i1p1f1.gn mean=279.19 std=21.37 z=0.00
	MPI-M.ICON-ESM-LR.r1i1p1f1.gn mean=288.30 std=16.12 z=0.34
	NCAR.CESM2-FV2.r1i1p1f1.gn mean=5.62 std=21.90 z=9.02 <----
	SNU.SAM0-UNICON.r1i1p1f1.gn mean=276.58 std=22.81 z=0.09
	MPI-M.MPI-ESM1-2-LR.r1i1p1f1.gn mean=278.98 std=20.77 z=0.01
	NCAR.CESM2-WACCM-FV2.r1i1p1f1.gn mean=278.74 std=21.89 z=0.02
	NCAR.CESM2-WACCM.r1i1p1f1.gn mean=278.84 std=21.54 z=0.01
	NOAA-GFDL.GFDL-ESM4.r1i1p1f1.gr1 mean=278.21 std=21.85 z=0.03
	EC-Earth-Consortium.EC-Earth3-Veg-LR.r1i1p1f1.gr mean=279.48 std=19.98 z=0.01
	AWI.AWI-CM-1-1-MR.r1i1p1f1.gn mean=279.11 std=21.24 z=0.00
	AWI.AWI-ESM-1-1-LR.r1i1p1f1.gn mean=278.71 std=20.95 z=0.02
	AS-RCEC.TaiESM1.r1i1p1f1.gn mean=278.25 std=22.45 z=0.03
	CAS.FGOALS-g3.r1i1p1f1.gn mean=280.57 std=20.88 z=0.05
	MIROC.MIROC-ES2H.r1i1p1f2.gn mean=279.72 std=20.64 z=0.02
	CCCma.CanESM5-CanOE.r1i1p2f1.gn mean=279.23 std=21.61 z=0.00
	NASA-GISS.GISS-E3-G.r1i1p1f1.gr mean=278.51 std=20.27 z=0.02
	MOHC.HadGEM3-GC31-MM.r1i1p1f3.gn mean=279.09 std=21.21 z=0.00
	E3SM-Project.E3SM-1-0.r1i1p1f1.gr mean=278.57 std=20.80 z=0.02
	NCC.NorESM2-LM.r1i1p1f1.gn mean=278.81 std=21.58 z=0.01
	EC-Earth-Consortium.EC-Earth3-AerChem.r1i1p1f1.gr mean=279.77 std=19.76 z=0.02
	EC-Earth-Consortium.EC-Earth3.r1i1p1f1.gr mean=280.17 std=19.58 z=0.03
	NASA-GISS.GISS-E2-1-G.r1i1p1f1.gn mean=278.33 std=22.36 z=0.03
	FIO-QLNM.FIO-ESM-2-0.r1i1p1f1.gn mean=278.24 std=22.17 z=0.03
	EC-Earth-Consortium.EC-Earth3-CC.r1i1p1f1.gr mean=280.69 std=19.46 z=0.05
	EC-Earth-Consortium.EC-Earth3-Veg.r1i1p1f1.gr mean=279.86 std=19.82 z=0.02
	THU.CIESM.r1i1p1f1.gr mean=279.91 std=21.24 z=0.02
	CNRM-CERFACS.CNRM-CM6-1.r1i1p1f2.gr mean=277.88 std=21.95 z=0.04
	CMCC.CMCC-CM2-HR4.r1i1p1f1.gn mean=279.30 std=21.17 z=0.00
	NASA-GISS.GISS-E2-2-H.r1i1p1f1.gn mean=277.12 std=22.08 z=0.07
	NASA-GISS.GISS-E2-1-G-CC.r1i1p1f1.gn mean=278.47 std=22.18 z=0.02
	NCAR.CESM2.r1i1p1f1.gn mean=279.03 std=21.56 z=0.01
	NOAA-GFDL.GFDL-CM4.r1i1p1f1.gr1 mean=277.83 std=21.53 z=0.05
	NIMS-KMA.KACE-1-0-G.r1i1p1f1.gr mean=278.98 std=21.73 z=0.01
	HAMMOZ-Consortium.MPI-ESM-1-2-HAM.r1i1p1f1.gn mean=279.18 std=20.45 z=0.00
	NCC.NorESM2-MM.r1i1p1f1.gn mean=278.47 std=21.80 z=0.03
	INM.INM-CM5-0.r1i1p1f1.gr1 mean=278.23 std=21.56 z=0.03
	CNRM-CERFACS.CNRM-ESM2-1.r1i1p1f2.gr mean=278.85 std=21.38 z=0.01
	INM.INM-CM4-8.r1i1p1f1.gr1 mean=278.03 std=22.02 z=0.04
	CMCC.CMCC-ESM2.r1i1p1f1.gn mean=279.42 std=21.43 z=0.01
	IPSL.IPSL-CM6A-LR.r1i1p1f1.gr mean=278.04 std=22.22 z=0.04
	NASA-GISS.GISS-E2-1-H.r1i1p1f1.gn mean=278.94 std=21.59 z=0.01
	BCC.BCC-ESM1.r1i1p1f1.gn mean=279.53 std=21.94 z=0.01
	MOHC.HadGEM3-GC31-LL.r1i1p1f3.gn mean=278.70 std=21.84 z=0.02
	MIROC.MIROC6.r1i1p1f1.gn mean=281.09 std=20.11 z=0.06
	MIROC.MIROC-ES2L.r1i1p1f2.gn mean=281.24 std=19.00 z=0.07
	MOHC.UKESM1-0-LL.r1i1p1f2.gn mean=278.18 std=22.26 z=0.03
	KIOST.KIOST-ESM.r1i1p1f1.gr1 mean=278.18 std=20.73 z=0.04
	BCC.BCC-CSM2-MR.r1i1p1f1.gn mean=279.86 std=21.15 z=0.02
	"""