Skip to content

Instantly share code, notes, and snippets.

@dmoliveira
Forked from ebressert/Anscombe's Quartet
Last active August 29, 2015 14:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dmoliveira/0d2c089eda65be47f6fe to your computer and use it in GitHub Desktop.
Save dmoliveira/0d2c089eda65be47f6fe to your computer and use it in GitHub Desktop.
import seaborn as sns
from scipy.optimize import curve_fit
# Function for linear fit
def func(x, a, b):
return a + b * x
# Seaborn conveniently provides the data for
# Anscombe's quartet.
df = sns.load_dataset("anscombe")
dset = df.dataset.unique()
# Computing basic stats and linear regression
# all groups.
for data in dset:
tmp = df.loc[df.dataset == data]
print 'Data {0}'.format(data)
print 'Mean x: {0:.1f}'.format(tmp.x.mean())
print 'Mean y: {0:.1f}'.format(tmp.y.mean())
print 'Variance x: {0:.2f}'.format(tmp.x.var())
print 'Variance y: {0:.2f}'.format(tmp.y.var())
print 'Correlation between x and y: {0:.3f}'.format(tmp.x.corr(tmp.y))
popt, pcov = curve_fit(func, tmp.x, tmp.y)
print 'Linear regression coefficients: y = {a:.2f} + {b:.2f}x'.format(a=popt[0], b=popt[1])
print '\n'
# Setting up params for graphic
sns.set(style="white")
sns.set_context("talk")
sns.despine(left=True)
# Plotting
catcher = sns.lmplot("x", "y", col="dataset", hue="dataset", data=df,
col_wrap=2, ci=None, palette="husl", size=4,
scatter_kws={"s": 50, "alpha": 0.7}, aspect=1.5)
sns.despine()
# Saving graphic
plt.savefig('quartet.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment