Skip to content

Instantly share code, notes, and snippets.

@thistleknot
Last active May 8, 2021
Embed
What would you like to do?
python backwards step regression with zca whitening
trf = zca.ZCA().fit(transformed.iloc[:,1:])
X = pd.DataFrame(trf.transform(transformed.iloc[:,1:]))
X.columns=pd.DataFrame(transformed.iloc[:,1:]).columns
X.index = all_data.index
#from sklearn.preprocessing import StandardScaler
y = pd.DataFrame((transformed.iloc[:,0]-np.mean(transformed.iloc[:,0]))/np.std(transformed.iloc[:,0]))
y.columns=pd.DataFrame(transformed.iloc[:,0]).columns
y.index = all_data.index
#backwards step
max_pvalue = 1
New_Names = X.columns
X_b = X
while (max_pvalue > .05):
model = sm.OLS(y,sm.tools.tools.add_constant(X_b, prepend=True, has_constant='skip'))
results = model.fit()
set_ = X_b.columns.tolist()
max_pvalue = max(results.pvalues[1:])
if (max_pvalue > .05):
print(max_pvalue)
max_pname = (results.pvalues[1:]).idxmax(axis=1)
set_.remove(max_pname)
New_Names = set_
trf = zca.ZCA().fit(transformed[New_Names])
X_b = pd.DataFrame(trf.transform(transformed[New_Names]))
X_b.columns=pd.DataFrame(transformed[New_Names]).columns
X_b.index = transformed.index
print(results.summary())
data = pd.concat([y,X_b],axis=1)
print(data.columns)
data.index = all_data.index
Path="/mnt/distvol"
filename="data"
data.to_csv("/mnt/distvol/data.csv",index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment