barronh/cmaq_taylor_example.py

## cmaq_taylor_example.py
"""
This example calculates statistics for a single day from the EPA EQUATES CMAQ simulation for 2018-06-14
and creates a taylor diagram.
"""


import pyrsig
import pandas as pd
import pyproj
import taylordiagram as td

api = pyrsig.RsigApi(bdate='2018-06-14T00Z', edate='2018-06-14T23:59:59Z', bbox=(-140, 0, -50, 70))
qds = api.to_ioapi('cmaq.equates.conus.aconc.O3')
proj = pyproj.Proj(qds.attrs['crs_proj4'])
adf = api.to_dataframe('airnow.ozone', unit_keys=False, parse_dates=True)
adf['COL'], adf['ROW'] = proj(adf['LONGITUDE'], adf['LATITUDE'])
qatobs = qds['O3'].isel(LAY=0).sel(
    TSTEP=adf['time'].dt.tz_convert(None).to_xarray(),
    COL=adf['COL'].to_xarray(), ROW=adf['ROW'].to_xarray(), method='nearest'
).values
adf['MODCOL'] = qds.COL.sel(COL=adf['COL'].to_xarray(), method='nearest')
adf['MODROW'] = qds.ROW.sel(ROW=adf['ROW'].to_xarray(), method='nearest')
adf['mod'] = qatobs
tdf = adf[['ozone', 'mod']].dropna()
sdf = tdf.describe().T
sdf.loc[:, 'r'] = tdf.corr()['ozone'].values
bias = tdf.subtract(tdf['ozone'], axis=0)
sdf.loc[:, 'mb'] = bias.mean()
sdf.loc[:, 'rmse'] = (bias**2).mean()**.5
ax = td.taylordiagram(sdf, refkey='ozone')
ax.figure.savefig('cmaq_equates_taylordiagram_2018-06-14.png')

## taylordiagram.py
__all__ = ['taylordiagram', 'test_taylordiagram']
__version__ = '1.0'

def taylordiagram(
    sdf, refkey='obs', r_levels=None, std_levels=None, crmse_levels=None,
    circlepoints=None, ax=None, **popts
):
    """
    Taylor Diagram (doi:10.1029/2000JD900719) which simultaneously shows
    correlation (r), standard deviation (std) and centered root mean square
    error (cRMSE). This provides multiple statistical evaluation on a single
    plot for synthesizing several elements of model performance.

    sdf : pandas.DataFrame
        Must contain refkey and all keys in popts in its index and r, crmse,
        and std in its columns. If crmse is not present, it will be calculated
        from rmse and mb.
    refkey : str
        Must exist in the index and will be used as the "truth" or observation.
    r_levels : array-like
        Correlation levels to mark and annotate
    std_levels : array-like
        Standard deviation levels to mark and annotate
    crmse_levels : array-like
        Centered RMSE levels to mark and annotate
    circlepoints : int
        Number of points to resolve in the cirlce defaults to 20k
    ax : matplotlib.axes.Axes
        axes to plot on. Defaults to fig, ax = plt.subplots(figsize=(6, 6))
    popts : dict
        Plotting options for
    """
    import matplotlib.pyplot as plt
    import numpy as np

    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 6))

    # obs std is used in several calculations

    ref_std = sdf.loc[refkey, 'std']
    max_std = sdf['std'].max()
    std_step = 10**np.int32(np.log10(max_std)) / 2
    if std_levels is None:
        std_levels = np.arange(std_step, max_std + std_step, std_step)
    std_levels = np.asarray(std_levels)
    rmax = std_levels.max() + std_step
    if (sdf['r'] > 0).all():
        rmin = 0
    else:
        rmin = -rmax

    if 'crmse' not in sdf.columns:
        sdf = sdf.copy()
        sdf['crmse'] = (sdf['rmse']**2 - sdf['mb']**2)**.5
    # Points of the arc from r=0 to r=1 in radians
    if circlepoints is None:
        circlepoints = 10001
    if rmin >= 0:
        xr = np.linspace(0, 1, circlepoints)
    else:
        xr = np.linspace(-1, 1, circlepoints)
    yr = np.sin(np.arccos(xr))
    for pkey in sdf.index:
        popts.setdefault(pkey, {})
    popts[refkey].setdefault('marker', 'x')
    popts[refkey].setdefault('color', 'k')
    popts[refkey].setdefault('s', 96)

    for k, popt in popts.items():
        r = sdf.loc[k, 'r']
        std = sdf.loc[k, 'std']
        popt.setdefault('label', k)
        ax.scatter(r * std, np.sin(np.arccos(r)) * std, zorder=3, **popt)

    # Add std deviation reference lines
    x = xr * ref_std
    y = yr * ref_std
    ax.plot(x, y, label='ref std', linestyle='--', color='k')

    if len(std_levels) > 0:
        ax.plot(xr[:, None] * std_levels, yr[:, None] * std_levels, 'k-', zorder=2);
        ax.lines[-1].set_label('STD')

    # Add correlation reference lines
    if r_levels is None:
        if rmin < 0:
            r_levels = np.linspace(-.9, .9, 20)
        else:
            r_levels = np.linspace(0, .9, 10)

    radius = std_levels.max()
    for rv in r_levels:
        x = rv * radius
        y = np.ma.masked_less(radius**2 - x**2, 0)**.5
        ax.plot([0, x], [0, y], color='cyan', zorder=1)
        ax.annotate(
            xy=(x * 1.05, y * 1.05), text=f'{rv:.1f}', color='cyan', horizontalalignment='center',
            bbox=dict(facecolor='w', edgecolor='w', alpha=0.8)
        )
    if len(r_levels) > 0:
        ax.lines[-1].set_label('R')

    # Add cRMSE reference lines
    if crmse_levels is None:
        crmse_max = sdf['crmse'].max()
        crmse_step = 10**np.int32(np.log10(sdf['crmse'].max())) / 2
        crmse_levels = np.arange(crmse_step, crmse_max + crmse_step, crmse_step)

    for rms in crmse_levels:
        x = xr * rmax
        y = np.ma.masked_less(rms**2 - (x - ref_std)**2, 0)**.5
        ax.plot(x, y, color='palegreen', zorder=0)
        xa = x[~y.mask].max()
        ya = y.compressed()[-1]
        ax.annotate(
            xy=(xa, ya), text=f'{rms:.1f}', color='palegreen',
            horizontalalignment='right', verticalalignment='bottom',
            bbox=dict(facecolor='w', edgecolor='w', alpha=0.8)
        )
    if len(crmse_levels) > 0:
        ax.lines[-1].set_label('cRMSE')

    ax.spines['top'].set(visible=False)
    ax.spines['right'].set(visible=False)

    ax.set(
        xlim=(rmin, rmax), ylim=(0, rmax), xlabel='Standard Deviation',
        ylabel='Standard Deviation'
    )
    ax.legend(ncol=3, bbox_to_anchor=(.5, .85), loc='lower center')
    return ax


def test_taylordiagram():
    """
    Returns
    -------
    ax : matplotlib.axes.Axes
        Axes object with Taylor diagram.
    """
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt

    # Create model data first.
    yhat = np.cos(np.linspace(-np.pi, np.pi))
    # Add noise to create a reference truth
    y = yhat + np.random.normal(0, 1, size=yhat.size)
    tdf = pd.DataFrame(dict(obs=y, mod=yhat))
    # Calculate stats
    bias = tdf.subtract(tdf['obs'], axis=0)
    sdf = pd.DataFrame(dict(
        std=tdf.std(), mb=bias.mean(),
        rmse=(bias**2).mean()**.5, r=tdf.corr()['obs']
    ))
    ax = taylordiagram(sdf)
    return ax


if __name__ == "__main__":
    ax = test_taylordiagram()
	"""
	This example calculates statistics for a single day from the EPA EQUATES CMAQ simulation for 2018-06-14
	and creates a taylor diagram.
	"""


	import pyrsig
	import pandas as pd
	import pyproj
	import taylordiagram as td

	api = pyrsig.RsigApi(bdate='2018-06-14T00Z', edate='2018-06-14T23:59:59Z', bbox=(-140, 0, -50, 70))
	qds = api.to_ioapi('cmaq.equates.conus.aconc.O3')
	proj = pyproj.Proj(qds.attrs['crs_proj4'])
	adf = api.to_dataframe('airnow.ozone', unit_keys=False, parse_dates=True)
	adf['COL'], adf['ROW'] = proj(adf['LONGITUDE'], adf['LATITUDE'])
	qatobs = qds['O3'].isel(LAY=0).sel(
	TSTEP=adf['time'].dt.tz_convert(None).to_xarray(),
	COL=adf['COL'].to_xarray(), ROW=adf['ROW'].to_xarray(), method='nearest'
	).values
	adf['MODCOL'] = qds.COL.sel(COL=adf['COL'].to_xarray(), method='nearest')
	adf['MODROW'] = qds.ROW.sel(ROW=adf['ROW'].to_xarray(), method='nearest')
	adf['mod'] = qatobs
	tdf = adf[['ozone', 'mod']].dropna()
	sdf = tdf.describe().T
	sdf.loc[:, 'r'] = tdf.corr()['ozone'].values
	bias = tdf.subtract(tdf['ozone'], axis=0)
	sdf.loc[:, 'mb'] = bias.mean()
	sdf.loc[:, 'rmse'] = (bias2).mean().5
	ax = td.taylordiagram(sdf, refkey='ozone')
	ax.figure.savefig('cmaq_equates_taylordiagram_2018-06-14.png')
	__all__ = ['taylordiagram', 'test_taylordiagram']
	__version__ = '1.0'

	def taylordiagram(
	sdf, refkey='obs', r_levels=None, std_levels=None, crmse_levels=None,
	circlepoints=None, ax=None, **popts
	):
	"""
	Taylor Diagram (doi:10.1029/2000JD900719) which simultaneously shows
	correlation (r), standard deviation (std) and centered root mean square
	error (cRMSE). This provides multiple statistical evaluation on a single
	plot for synthesizing several elements of model performance.

	sdf : pandas.DataFrame
	Must contain refkey and all keys in popts in its index and r, crmse,
	and std in its columns. If crmse is not present, it will be calculated
	from rmse and mb.
	refkey : str
	Must exist in the index and will be used as the "truth" or observation.
	r_levels : array-like
	Correlation levels to mark and annotate
	std_levels : array-like
	Standard deviation levels to mark and annotate
	crmse_levels : array-like
	Centered RMSE levels to mark and annotate
	circlepoints : int
	Number of points to resolve in the cirlce defaults to 20k
	ax : matplotlib.axes.Axes
	axes to plot on. Defaults to fig, ax = plt.subplots(figsize=(6, 6))
	popts : dict
	Plotting options for
	"""
	import matplotlib.pyplot as plt
	import numpy as np

	if ax is None:
	fig, ax = plt.subplots(figsize=(6, 6))

	# obs std is used in several calculations

	ref_std = sdf.loc[refkey, 'std']
	max_std = sdf['std'].max()
	std_step = 10**np.int32(np.log10(max_std)) / 2
	if std_levels is None:
	std_levels = np.arange(std_step, max_std + std_step, std_step)
	std_levels = np.asarray(std_levels)
	rmax = std_levels.max() + std_step
	if (sdf['r'] > 0).all():
	rmin = 0
	else:
	rmin = -rmax

	if 'crmse' not in sdf.columns:
	sdf = sdf.copy()
	sdf['crmse'] = (sdf['rmse']2 - sdf['mb']2)**.5
	# Points of the arc from r=0 to r=1 in radians
	if circlepoints is None:
	circlepoints = 10001
	if rmin >= 0:
	xr = np.linspace(0, 1, circlepoints)
	else:
	xr = np.linspace(-1, 1, circlepoints)
	yr = np.sin(np.arccos(xr))
	for pkey in sdf.index:
	popts.setdefault(pkey, {})
	popts[refkey].setdefault('marker', 'x')
	popts[refkey].setdefault('color', 'k')
	popts[refkey].setdefault('s', 96)

	for k, popt in popts.items():
	r = sdf.loc[k, 'r']
	std = sdf.loc[k, 'std']
	popt.setdefault('label', k)
	ax.scatter(r * std, np.sin(np.arccos(r)) * std, zorder=3, **popt)

	# Add std deviation reference lines
	x = xr * ref_std
	y = yr * ref_std
	ax.plot(x, y, label='ref std', linestyle='--', color='k')

	if len(std_levels) > 0:
	ax.plot(xr[:, None] * std_levels, yr[:, None] * std_levels, 'k-', zorder=2);
	ax.lines[-1].set_label('STD')

	# Add correlation reference lines
	if r_levels is None:
	if rmin < 0:
	r_levels = np.linspace(-.9, .9, 20)
	else:
	r_levels = np.linspace(0, .9, 10)

	radius = std_levels.max()
	for rv in r_levels:
	x = rv * radius
	y = np.ma.masked_less(radius2 - x2, 0)**.5
	ax.plot([0, x], [0, y], color='cyan', zorder=1)
	ax.annotate(
	xy=(x * 1.05, y * 1.05), text=f'{rv:.1f}', color='cyan', horizontalalignment='center',
	bbox=dict(facecolor='w', edgecolor='w', alpha=0.8)
	)
	if len(r_levels) > 0:
	ax.lines[-1].set_label('R')

	# Add cRMSE reference lines
	if crmse_levels is None:
	crmse_max = sdf['crmse'].max()
	crmse_step = 10**np.int32(np.log10(sdf['crmse'].max())) / 2
	crmse_levels = np.arange(crmse_step, crmse_max + crmse_step, crmse_step)

	for rms in crmse_levels:
	x = xr * rmax
	y = np.ma.masked_less(rms2 - (x - ref_std)2, 0)**.5
	ax.plot(x, y, color='palegreen', zorder=0)
	xa = x[~y.mask].max()
	ya = y.compressed()[-1]
	ax.annotate(
	xy=(xa, ya), text=f'{rms:.1f}', color='palegreen',
	horizontalalignment='right', verticalalignment='bottom',
	bbox=dict(facecolor='w', edgecolor='w', alpha=0.8)
	)
	if len(crmse_levels) > 0:
	ax.lines[-1].set_label('cRMSE')

	ax.spines['top'].set(visible=False)
	ax.spines['right'].set(visible=False)

	ax.set(
	xlim=(rmin, rmax), ylim=(0, rmax), xlabel='Standard Deviation',
	ylabel='Standard Deviation'
	)
	ax.legend(ncol=3, bbox_to_anchor=(.5, .85), loc='lower center')
	return ax


	def test_taylordiagram():
	"""
	Returns
	-------
	ax : matplotlib.axes.Axes
	Axes object with Taylor diagram.
	"""
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	# Create model data first.
	yhat = np.cos(np.linspace(-np.pi, np.pi))
	# Add noise to create a reference truth
	y = yhat + np.random.normal(0, 1, size=yhat.size)
	tdf = pd.DataFrame(dict(obs=y, mod=yhat))
	# Calculate stats
	bias = tdf.subtract(tdf['obs'], axis=0)
	sdf = pd.DataFrame(dict(
	std=tdf.std(), mb=bias.mean(),
	rmse=(bias2).mean().5, r=tdf.corr()['obs']
	))
	ax = taylordiagram(sdf)
	return ax


	if __name__ == "__main__":
	ax = test_taylordiagram()