Created
November 9, 2019 17:47
-
-
Save karpanGit/5ed258d1c0e866d824d64c76cbd618fa to your computer and use it in GitHub Desktop.
pandas: apply linear regression to groups
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# linear regression per group | |
# create dataset | |
import pandas as pd | |
import numpy as np | |
ns = np.random.randn(100) | |
df1 = pd.DataFrame({'x': ns + 0.05*np.random.randn(100), 'y':2.5*ns -1 + 0.05*np.random.randn(100)}) | |
df2 = pd.DataFrame({'x': ns + 0.05*np.random.randn(100), 'y':1.5*ns + 1 + 0.05*np.random.randn(100)}) | |
df = pd.concat([df1, df2], axis='index',keys=['a', 'b']).droplevel(level=1).reset_index().rename({'index':'key'}, axis='columns') | |
# the two groups have approximately the following (slope, intercept): (2.5, -1), (1.5, 1) | |
# apply linear regresion using numpy | |
def linReg(x, y): | |
'''linear regression using numpy starting from two one dimensional numpy arrays''' | |
A = np.vstack([x, np.ones(len(x))]).T | |
slope, intercept = np.linalg.lstsq(A, y, rcond=None)[0] | |
return pd.Series({'slope':slope, 'intercept': intercept}) | |
res = df.groupby('key').apply(lambda x: linReg(x.x, x.y)) | |
print(res) | |
# slope intercept | |
# key | |
# a 2.503503 -0.999729 | |
# b 1.503392 1.008813 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
thanks!