Skip to content

Instantly share code, notes, and snippets.

@joshreini1
Created March 1, 2023 16:23
Show Gist options
  • Save joshreini1/79b438af170e502e28c8b335ee24fb30 to your computer and use it in GitHub Desktop.
Save joshreini1/79b438af170e502e28c8b335ee24fb30 to your computer and use it in GitHub Desktop.
def rebalance_gender(df, data_type):
if data_type == 0:
df_female_true = df[(df['Sex'] == 'Female') & (df['PINCP'] == True)]
df_else = df[~((df['Sex'] == 'Female') & (df['PINCP'] == True))]
else:
df_female_true = df[(df['Sex_Female'] == 1) & (df['PINCP'] == True)]
df_else = df[~((df['Sex_Female'] == 1) & (df['PINCP'] == True))]
if data_type == 0:
num_samples = len(df[(df['Sex'] == 'Male') & (df['PINCP'] == True)])
else:
num_samples = len(df[(df['Sex_Male'] == 1) & (df['PINCP'] == True)])
# Resample female target segment so that they are the same size as male
df_female_true_resampled = resample(
df_female_true,
replace=True,
n_samples=num_samples,
random_state=1 # include random seed so we can perform same sampling on each data set
)
return pd.concat([df_female_true_resampled, df_else])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment