Skip to content

Instantly share code, notes, and snippets.

@DexGroves
Created August 17, 2020 10:04
Show Gist options
  • Save DexGroves/f41331c2f7029d8adc2d336b4342fe03 to your computer and use it in GitHub Desktop.
Save DexGroves/f41331c2f7029d8adc2d336b4342fe03 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
N = 10_000
np.random.seed(42)
df = pd.DataFrame(index=np.arange(N))
df['x2'] = np.random.choice([1,2,3], size=N)
df['x1'] = np.random.normal(size=N)
df['y'] = np.nan
df.loc[df.x2 == 1, 'y'] = df.x1 * 0
df.loc[df.x2 == 2, 'y'] = df.x1 * 0
df.loc[df.x2 == 3, 'x1'] = df.loc[df.x2 == 3, 'x1'] / 10
df.loc[df.x2 == 3, 'y'] = df.x1 * 10
df.y += np.random.normal(scale=0.1, size=N)
flm = LinearRegression()
flm.fit(X=df[['x1', 'x2']], y=df.y)
intercept, coef = flm.intercept_, flm.coef_
lms = {}
for x2, grp in df.groupby('x2'):
lm = LinearRegression()
lm.fit(X=grp[['x1']], y=grp.y)
lms[x2] = dict(N=len(grp), model=lm)
# Partial regression coefficient for X1
coef[0]
# Weighted average of individual regression coefficients on X1
sum([lm['model'].coef_ * lm['N'] for lm in lms.values()]) / N
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment