Skip to content

Instantly share code, notes, and snippets.

@ghl3
Last active August 29, 2015 14:08
Show Gist options
  • Save ghl3/ef82120ba87dac196e25 to your computer and use it in GitHub Desktop.
Save ghl3/ef82120ba87dac196e25 to your computer and use it in GitHub Desktop.
Calculate the rate of the jth column given the ith column is true
def conditional_rate(df):
"""
Given a dataframe whose columns are all boolean
valued, return a data frame of size:
n_columns * n_columns
where the ith row and the jth column represents
the rate at which the jth variable is true
given that the ith variable is true.
"""
res = pandas.DataFrame(columns=df.columns, index=df.columns)
for row_var in df.columns:
for column_var in df.columns:
# All rows where the given "row" variable is true
row_true = df[df[row_var]]
# If there are no values where row_var is true or
# if for all rows where row_var is true, column_var
# is false, add a 0 entry
if (len(row_true)==0 or sum(row_true[column_var])==0):
res[column_var][row_var] = column_true_rate_given_row_true = 0
else:
column_true_rate_given_row_true = row_true[column_var].value_counts(normalize=True)[True]
res[column_var][row_var] = column_true_rate_given_row_true
return res
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment