karpanGit/linear regression groups.py

## linear regression groups.py
# linear regression per group

# create dataset
import pandas as pd
import numpy as np
ns = np.random.randn(100)
df1 = pd.DataFrame({'x': ns + 0.05*np.random.randn(100), 'y':2.5*ns -1 + 0.05*np.random.randn(100)})
df2 = pd.DataFrame({'x': ns + 0.05*np.random.randn(100), 'y':1.5*ns + 1 + 0.05*np.random.randn(100)})
df = pd.concat([df1, df2], axis='index',keys=['a', 'b']).droplevel(level=1).reset_index().rename({'index':'key'}, axis='columns')
# the two groups have approximately the following (slope, intercept): (2.5, -1), (1.5, 1)

# apply linear regresion using numpy
def linReg(x, y):
    '''linear regression using numpy starting from two one dimensional numpy arrays'''
    A = np.vstack([x, np.ones(len(x))]).T
    slope, intercept = np.linalg.lstsq(A, y, rcond=None)[0]
    return pd.Series({'slope':slope, 'intercept': intercept})
res = df.groupby('key').apply(lambda x: linReg(x.x, x.y))
print(res)
#         slope  intercept
# key
# a    2.503503  -0.999729
# b    1.503392   1.008813
	# linear regression per group

	# create dataset
	import pandas as pd
	import numpy as np
	ns = np.random.randn(100)
	df1 = pd.DataFrame({'x': ns + 0.05np.random.randn(100), 'y':2.5ns -1 + 0.05*np.random.randn(100)})
	df2 = pd.DataFrame({'x': ns + 0.05np.random.randn(100), 'y':1.5ns + 1 + 0.05*np.random.randn(100)})
	df = pd.concat([df1, df2], axis='index',keys=['a', 'b']).droplevel(level=1).reset_index().rename({'index':'key'}, axis='columns')
	# the two groups have approximately the following (slope, intercept): (2.5, -1), (1.5, 1)

	# apply linear regresion using numpy
	def linReg(x, y):
	'''linear regression using numpy starting from two one dimensional numpy arrays'''
	A = np.vstack([x, np.ones(len(x))]).T
	slope, intercept = np.linalg.lstsq(A, y, rcond=None)[0]
	return pd.Series({'slope':slope, 'intercept': intercept})
	res = df.groupby('key').apply(lambda x: linReg(x.x, x.y))
	print(res)
	# slope intercept
	# key
	# a 2.503503 -0.999729
	# b 1.503392 1.008813