Skip to content

Instantly share code, notes, and snippets.

@dradecic
Created September 9, 2019 12:51
Show Gist options
  • Save dradecic/52d8b2b2213dd3d46f4b75f85c1183f2 to your computer and use it in GitHub Desktop.
Save dradecic/52d8b2b2213dd3d46f4b75f85c1183f2 to your computer and use it in GitHub Desktop.
ara_2_woe_iv_function
def calculate_woe_iv(dataset, feature, target):
lst = []
for i in range(dataset[feature].nunique()):
val = list(dataset[feature].unique())[i]
lst.append({
'Value': val,
'All': dataset[dataset[feature] == val].count()[feature],
'Good': dataset[(dataset[feature] == val) & (dataset[target] == 0)].count()[feature],
'Bad': dataset[(dataset[feature] == val) & (dataset[target] == 1)].count()[feature]
})
dset = pd.DataFrame(lst)
dset['Distr_Good'] = dset['Good'] / dset['Good'].sum()
dset['Distr_Bad'] = dset['Bad'] / dset['Bad'].sum()
dset['WoE'] = np.log(dset['Distr_Good'] / dset['Distr_Bad'])
dset = dset.replace({'WoE': {np.inf: 0, -np.inf: 0}})
dset['IV'] = (dset['Distr_Good'] - dset['Distr_Bad']) * dset['WoE']
iv = dset['IV'].sum()
dset = dset.sort_values(by='WoE')
return dset, iv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment