Skip to content

Instantly share code, notes, and snippets.

@tkazusa
Last active December 2, 2018 07:16
Show Gist options
  • Save tkazusa/be15f0994b2f03cabbd2de2ef5d58f1a to your computer and use it in GitHub Desktop.
Save tkazusa/be15f0994b2f03cabbd2de2ef5d58f1a to your computer and use it in GitHub Desktop.
def negative_down_sampling(data, random_state, target_variable):
positive_data = data[data[target_variable] == 1]
positive_ratio = float(len(positive_data)) / len(data)
negative_data = data[data[target_variable] == 0].sample(
frac=positive_ratio / (1 - positive_ratio), random_state=random_state)
return pd.concat([positive_data, negative_data])
def multi_class_downsampling(data, random_state, target_variable):
data0 = data[data[target_variable] == 0]
data1 = data[data[target_variable] == 1]
data2 = data[data[target_variable] == 2]
data3 = data[data[target_variable] == 3]
data0_ratio = float(len(data0)) / len(data)
data1_ratio = float(len(data1)) / len(data)
data2_ratio = float(len(data2)) / len(data)
data3_ratio = float(len(data3)) / len(data)
data0_sampled = data[data[target_variable] == 0].sample(
frac=data3_ratio / data0_ratio, random_state=random_state)
data1_sampled = data[data[target_variable] == 1].sample(
frac=data3_ratio / data1_ratio, random_state=random_state)
data2_sampled = data[data[target_variable] == 2].sample(
frac=data3_ratio / data2_ratio, random_state=random_state)
return pd.concat([data0_sampled, data1_sampled, data2_sampled, data3])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment