rileypeterson/Expanding Linear Regression

## Expanding Linear Regression
# UPDATE: He re-added the question here: https://stackoverflow.com/questions/52048919/how-to-incrementally-add-linear-regression-column-to-pandas-dataframe/52068085#52068085

# Some guy had this weird question on Stack Overflow about cummulatively applying linear regression to a dataframe
# He deleted the question (I don't think this operation is very useful), but I figured out a way to do it here:
# Pretty wacky
from io import StringIO
import pandas as pd
import numpy as np

df = pd.read_table(StringIO("""     a    b
0  6.0  0.6
1  1.0  0.3
2  3.0  0.8
3  5.0  0.1
4  7.0  0.4
5  2.0  0.2
6  0.0  0.9
7  4.0  0.7
8  8.0  0.0
9  9.0  0.5
10 10.0 0.4
11 11.0 0.35
12 12.0 0.3
13 13.0 0.28
14 14.0 0.27
15 15.0 0.22"""), sep='\s+')
df = df.sort_values(by='a')
ax = df.plot(x='a',y='b',kind='scatter')
m, b = np.polyfit(df['a'],df['b'],1)
lin_reg = lambda x, m, b : m*x + b
df['lin'] = lin_reg(df['a'], m, b)
def make_m(x):
    y = df['b'].iloc[0:len(x)]
    return np.polyfit(x, y, 1)[0]
def make_b(x):
    y = df['b'].iloc[0:len(x)]
    return np.polyfit(x, y, 1)[1]
df['new'] = df['a'].expanding().apply(make_m, raw=True)*df['a'] + df['a'].expanding().apply(make_b, raw=True)
# df = df.sort_values(by='a')
ax.plot(df.a,df.lin)
ax.plot(df.a,df.new)
	# UPDATE: He re-added the question here: https://stackoverflow.com/questions/52048919/how-to-incrementally-add-linear-regression-column-to-pandas-dataframe/52068085#52068085

	# Some guy had this weird question on Stack Overflow about cummulatively applying linear regression to a dataframe
	# He deleted the question (I don't think this operation is very useful), but I figured out a way to do it here:
	# Pretty wacky
	from io import StringIO
	import pandas as pd
	import numpy as np

	df = pd.read_table(StringIO(""" a b
	0 6.0 0.6
	1 1.0 0.3
	2 3.0 0.8
	3 5.0 0.1
	4 7.0 0.4
	5 2.0 0.2
	6 0.0 0.9
	7 4.0 0.7
	8 8.0 0.0
	9 9.0 0.5
	10 10.0 0.4
	11 11.0 0.35
	12 12.0 0.3
	13 13.0 0.28
	14 14.0 0.27
	15 15.0 0.22"""), sep='\s+')
	df = df.sort_values(by='a')
	ax = df.plot(x='a',y='b',kind='scatter')
	m, b = np.polyfit(df['a'],df['b'],1)
	lin_reg = lambda x, m, b : m*x + b
	df['lin'] = lin_reg(df['a'], m, b)
	def make_m(x):
	y = df['b'].iloc[0:len(x)]
	return np.polyfit(x, y, 1)[0]
	def make_b(x):
	y = df['b'].iloc[0:len(x)]
	return np.polyfit(x, y, 1)[1]
	df['new'] = df['a'].expanding().apply(make_m, raw=True)*df['a'] + df['a'].expanding().apply(make_b, raw=True)
	# df = df.sort_values(by='a')
	ax.plot(df.a,df.lin)
	ax.plot(df.a,df.new)