Skip to content

Instantly share code, notes, and snippets.

@Barry1
Last active June 13, 2020 12:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Barry1/66863ba19253965ac3ae5b152586cf39 to your computer and use it in GitHub Desktop.
Save Barry1/66863ba19253965ac3ae5b152586cf39 to your computer and use it in GitHub Desktop.
function for showing graph with ci bands for quality of given of linear regression modell
import matplotlib.pyplot
from numpy import sqrt
import pandas
from scipy.special import erfinv
from typing import Union
def dataqualityplot(
data: pandas.DataFrame,
x: str,
y: str,
modell: Union[None, str] = None,
res: int = 50,
ci: float = 0.95,
scatter: bool = False,
globalbands: bool = False,
oldfill=False,
**pltopts) -> None:
sigmafactor = erfinv(ci) * sqrt(2) # type: float
if modell:
wrkdata = data[[x, y, modell]].sort_values(
x).reset_index(drop=True) # type: pandas.DataFrame
else:
covges = data[[x, y]].cov()
a = covges.loc[y, x] / covges.loc[x, x] # type: float
b = data[y].mean() - a * data[x].mean() # type: float
modell = 'Lineare Regression' # f'Lineare Regression {a!r}x+{b!r}'
wrkdata = data[[x, y]].sort_values(x).reset_index(drop=True)
wrkdata.loc[:, modell] = a * wrkdata[x] + b
assert wrkdata[x].is_monotonic
wrkdata.loc[:, 'Error'] = wrkdata[y] - wrkdata[modell]
grpcol = res * wrkdata.index.to_series() // len(wrkdata)
grpobj = wrkdata.groupby(by=grpcol)
matplotlib.pyplot.figure(**pltopts)
ax = matplotlib.pyplot.gca() # type: matplotlib.pyplot.axes.Axes
grpdata = wrkdata.groupby(by=grpcol).mean()
ax.plot(grpdata[x], grpdata[y], label=y + ' (gruppiert)')
savecolor = ax.plot(grpdata[x], grpdata[modell],
label=modell + ' (gruppiert)')[0].get_color() # type:str
fehler: Union[float, pandas.Series]
deltarange: Union[float, pandas.Series]
if globalbands:
fehler = wrkdata['Error'].mean()
deltarange = wrkdata['Error'].std(ddof=0)
else:
fehler = grpdata['Error']
deltarange = wrkdata.loc[:, 'Error'].groupby(
by=grpcol).std(ddof=0)
if oldfill:
ax.fill_between(grpdata[x],
grpdata[modell] + fehler - sigmafactor * deltarange,
grpdata[modell] + fehler + sigmafactor * deltarange,
facecolor=savecolor,
step='mid',
alpha=.15)
else:
xall = grpobj.agg({x: ['first', 'last']}) # .values.flatten()
modellall = grpobj.agg(
{modell: ['first', 'last']}) # .values.flatten()
modellall['flb'] = modellall[modell]['first'] + \
fehler - sigmafactor * deltarange
modellall['llb'] = modellall[modell]['last'] + \
fehler - sigmafactor * deltarange
modellall['fub'] = modellall[modell]['first'] + \
fehler + sigmafactor * deltarange
modellall['lub'] = modellall[modell]['last'] + \
fehler + sigmafactor * deltarange
ax.fill_between(xall.values.flatten(),
modellall[['flb', 'llb']].values.flatten() - 1,
modellall[['fub', 'lub']].values.flatten() + 1,
facecolor=savecolor,
alpha=.15)
if scatter:
ax.plot(wrkdata[x], wrkdata[y], '.', label=y)
ax.set_xlabel(x)
ax.set_ylabel(y)
ax.legend()
ax.set_title(f'Daten, Modell und {100*ci} % Bestimmtheit')
if __name__ == "__main__":
import seaborn
print("Selbsttest bzw. Anwendungsbeispiel")
tips = seaborn.load_dataset("tips")
seaborn.regplot(x="total_bill", y="tip", data=tips, scatter=False, ci=60)
import numpy
seaborn.regplot(x="total_bill", y="tip", data=tips,
x_bins=numpy.linspace(0, 50, 10), x_ci=60, ci=60)
dataqualityplot(data=tips, x="total_bill", y="tip",
res=10, globalbands=True, figsize=(10, 5))
#https://holoviews.org/reference/elements/bokeh/Spread.html
#hv.extension('bokeh')
#hv.help(hv.Spread)
#xs = np.linspace(0, np.pi*2, 20)
# spread = hv.Spread((xs, np.sin(xs), 0.1+np.random.rand(len(xs)), 0.1+np.random.rand(len(xs))),
# vdims=['y', 'yerrneg', 'yerrpos'])
#spread.opts(fill_alpha=1, fill_color='indianred')
#http://holoviews.org/getting_started/Tabular_Datasets.html ErrorBars
# symmetric vs.
#class Spread(ErrorBars):
#https://holoviews.org/reference/elements/matplotlib/Area.html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment