Skip to content

Instantly share code, notes, and snippets.

@wolframalpha
Created November 27, 2019 06:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wolframalpha/ac8cb03e69efbb78d72fd878e994f856 to your computer and use it in GitHub Desktop.
Save wolframalpha/ac8cb03e69efbb78d72fd878e994f856 to your computer and use it in GitHub Desktop.
Variable transformation and importance using OLS/regression
import numpy as np
import pandas as pd
import statsmodels.api as sm
transformations = {'log': np.log,
'sqrt': np.sqrt,
'sqr': lambda x: np.power(x, 2),
'cube': lambda x: np.power(x, 3),
'cubert': lambda x: np.power(x, -3),
'original': lambda x: x}
def run_transformation(df, columns_to_tranform, transformations, y_col):
y = df[y_col].values.astype(float)
all_df = []
for col in columns_to_tranform:
# print(col)
for trans_name, trans in transformations.items():
X = df[col].apply(trans).replace([np.inf, -np.inf, np.nan], 1e-6).values.reshape(-1, 1)
# print()
X = sm.add_constant(X)
# print()
lm1 = sm.OLS(y, X).fit()
s = lm1.summary()
df_results = pd.DataFrame(s.tables[1].data[1:], columns=s.tables[1].data[0],)
df_results['r2'] = s.tables[0].data[0][-1]
df_results['transformation'] = trans_name
df_results['variable'] = col
all_df.append(df_results)
return pd.concat(all_df, axis=0, ignore_index=True)
# return lm1
results = run_transformation(df_merged, columns_to_tranform, transformations, 'VGO T 95')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment