Skip to content

Instantly share code, notes, and snippets.

@stasSajin
Created June 25, 2020 20:42
Show Gist options
  • Save stasSajin/f325b7585c4df87e0566f912d4edbe16 to your computer and use it in GitHub Desktop.
Save stasSajin/f325b7585c4df87e0566f912d4edbe16 to your computer and use it in GitHub Desktop.
Blogpost generate_data
def generate_data(group_size, effect_size = 0):
group1 = np.array([1] * group_size)
group2 = np.array([0] * group_size)
minutes = np.maximum(0, np.random.normal(25, 20, size = 2*group_size).astype(int))
income = (np.random.normal(10000, 2000, size = 2*group_size) * minutes).astype(int)
assets = (np.random.normal(100000, 20000, size = 2*group_size) * minutes).astype(int)
group_assignment = np.concatenate([group1, group2])
np.random.shuffle(group_assignment)
data = pd.DataFrame({'group': group_assignment,
'minutes': minutes,
'income': income,
'assets': assets
})
data.loc[data["group"] == 1, "minutes"] += effect_size
return data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment