Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
import seaborn as sns
from scipy.optimize import curve_fit
# Function for linear fit
def func(x, a, b):
return a + b * x
# Seaborn conveniently provides the data for
# Anscombe's quartet.
df = sns.load_dataset("anscombe")
dset = df.dataset.unique()
# Computing basic stats and linear regression
# all groups.
for data in dset:
tmp = df.loc[df.dataset == data]
print 'Data {0}'.format(data)
print 'Mean x: {0:.1f}'.format(tmp.x.mean())
print 'Mean y: {0:.1f}'.format(tmp.y.mean())
print 'Variance x: {0:.2f}'.format(tmp.x.var())
print 'Variance y: {0:.2f}'.format(tmp.y.var())
print 'Correlation between x and y: {0:.3f}'.format(tmp.x.corr(tmp.y))
popt, pcov = curve_fit(func, tmp.x, tmp.y)
print 'Linear regression coefficients: y = {a:.2f} + {b:.2f}x'.format(a=popt[0], b=popt[1])
print '\n'
# Setting up params for graphic
# Plotting
catcher = sns.lmplot("x", "y", col="dataset", hue="dataset", data=df,
col_wrap=2, ci=None, palette="husl", size=4,
scatter_kws={"s": 50, "alpha": 0.7}, aspect=1.5)
# Saving graphic
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment