Skip to content

Instantly share code, notes, and snippets.

@neldivad
Last active February 15, 2023 05:23
Show Gist options
  • Save neldivad/5a1f847feee02b868aeb09a320e13075 to your computer and use it in GitHub Desktop.
Save neldivad/5a1f847feee02b868aeb09a320e13075 to your computer and use it in GitHub Desktop.
Get VIF plots
def get_numeric_col_list(df):
import numpy as np
numerics = []
for col in df.columns:
if df[col].dtype in [np.float64, np.float32, np.int32, np.int64]:
numerics.append(col)
return numerics
def get_categorical_col_list(df):
categories = []
for col in df.columns:
if df[col].dtype in ['object', 'category', 'bool']:
categories.append(col)
return categories
def get_nan_cols(df, threshold):
cols =[]
for col in df.columns:
if df[col].isna().sum() / len(df) > threshold:
cols.append(col)
return cols
def get_VIF_plot(df):
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
import plotly.express as px
numerics = get_numeric_col_list(df)
nan_cols = get_nan_cols(df, 0.2) # remove cols with 20% nan
numerics = [item for item in numerics if item not in nan_cols]
temp = df[numerics]
temp = temp.fillna(0) # fillna so vif works
VIF = pd.DataFrame()
VIF['feature'] = numerics
VIF['VIF'] = 0
for i in range( temp.shape[1] ):
try:
VIF['VIF'][i] = variance_inflation_factor( temp.values, i )
except:
VIF['VIF'][i] = 0
fig = px.bar(
VIF,
x='feature',
y='VIF',
color='feature',
log_y=True,
)
fig.update_layout(showlegend=False)
fig.update_xaxes(title='')
return fig
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment