Last active
October 21, 2019 03:10
-
-
Save NaimKabir/4a873db29e59ad34806b05511c83ff7d to your computer and use it in GitHub Desktop.
Simulating a randomized controlled trial
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from itertools import product\n", | |
"from sklearn.preprocessing import normalize" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Participant simulation\n", | |
"\n", | |
"First we'll create *n* participants in our experiment." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"n = 10000" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"For each of these participants, we'll randomly assign them some (Compliance, Response) behavior combination." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"compliance_partitions = [\n", | |
" \"always_taker\",\n", | |
" \"complier\",\n", | |
" \"defier\",\n", | |
" \"never_taker\",\n", | |
"]\n", | |
"response_partitions = [\n", | |
" \"always_better\",\n", | |
" \"helped\",\n", | |
" \"hurt\",\n", | |
" \"never_better\",\n", | |
"]\n", | |
"\n", | |
"# Take the cross product of these sets of types\n", | |
"\n", | |
"partition_types = product(\n", | |
" compliance_partitions, response_partitions\n", | |
")\n", | |
"partition_types = np.array(list(partition_types))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# We'll simulate probabilities that our participants\n", | |
"# will belong to one of the 16 possible behavior combinations\n", | |
"\n", | |
"# I'm setting up a contrived example\n", | |
"# to prove my point with these arbitrary floats--\n", | |
"# trust me it's instructive!\n", | |
"\n", | |
"arbitrary_floats = np.array(\n", | |
" [\n", | |
" [\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0.01,\n", | |
" 0,\n", | |
" 0.14,\n", | |
" 0,\n", | |
" 0.16,\n", | |
" 0.32,\n", | |
" 0,\n", | |
" 0.31,\n", | |
" 0,\n", | |
" 0.04,\n", | |
" 0.02,\n", | |
" 0,\n", | |
" 0,\n", | |
" ]\n", | |
" ]\n", | |
")\n", | |
"partition_probabilities = normalize(\n", | |
" arbitrary_floats, \"l1\"\n", | |
")\n", | |
"partition_probabilities = (\n", | |
" partition_probabilities.flatten()\n", | |
")\n", | |
"\n", | |
"# to be a true set of probabilities, the vector sum\n", | |
"# needs to be 1\n", | |
"\n", | |
"# sometimes this can fail because of precision errors,\n", | |
"# so let's assert it\n", | |
"\n", | |
"assert partition_probabilities.sum() == 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# drawing participant compliance and response behaviors according to the\n", | |
"# specified distribution\n", | |
"\n", | |
"participant_partition = np.random.choice(\n", | |
" range(len(partition_types)),\n", | |
" n,\n", | |
" p=partition_probabilities,\n", | |
")\n", | |
"\n", | |
"compliance_response_pairs = zip(\n", | |
" *partition_types[participant_partition]\n", | |
")\n", | |
"compliance_type, response_type = list(\n", | |
" compliance_response_pairs\n", | |
")\n", | |
"\n", | |
"# assigning participants to Control and Treatment groups\n", | |
"# with 50% probability\n", | |
"\n", | |
"assignments = [\"control\", \"treatment\"]\n", | |
"participant_assignment = np.random.choice(\n", | |
" assignments, n\n", | |
")\n", | |
"\n", | |
"# compiling all information into a dataframe\n", | |
"# that simulates the participants\n", | |
"\n", | |
"df = pd.DataFrame(\n", | |
" {\n", | |
" \"assignment\": participant_assignment,\n", | |
" \"compliance_type\": compliance_type,\n", | |
" \"response_type\": response_type,\n", | |
" }\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Simulate whether participants took treatment\n", | |
"\n", | |
"Depending on assignment and compliance type, we can simulate whether or not each participant took the treatment." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# if the participant is an always_taker,\n", | |
"# they'll always take the treatment.\n", | |
"\n", | |
"df[\"took_treatment\"] = (\n", | |
" df.compliance_type == \"always_taker\"\n", | |
")\n", | |
"\n", | |
"# if they're a complier, they'll take the treatment\n", | |
"# as long as they're in the treatment condition.\n", | |
"\n", | |
"df[\"took_treatment\"] = df[\"took_treatment\"] | (\n", | |
" (df.compliance_type == \"complier\")\n", | |
" & (df.assignment == \"treatment\")\n", | |
")\n", | |
"\n", | |
"# if they're a defier, they'll only take the treatment\n", | |
"# if they were in the control condition.\n", | |
"\n", | |
"df[\"took_treatment\"] = df[\"took_treatment\"] | (\n", | |
" (df.compliance_type == \"defier\")\n", | |
" & (df.assignment == \"control\")\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Simulate Outcomes\n", | |
"\n", | |
"Now we can simulate outcomes from the experiment.\n", | |
"\n", | |
"Depending on whether they took the treatment and their `response_type`, did they end up in a Good or Bad state after the experiment's conclusion?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# if the participant is of the always_better type,\n", | |
"# they'll definitely have a good outcome.\n", | |
"\n", | |
"df[\"good_outcome\"] = (\n", | |
" df.response_type == \"always_better\"\n", | |
")\n", | |
"\n", | |
"# if the participant is of the 'helped' type,\n", | |
"# they'll have a good outcome as long as they\n", | |
"# took treatment.\n", | |
"\n", | |
"df[\"good_outcome\"] = df[\"good_outcome\"] | (\n", | |
" (df.response_type == \"helped\")\n", | |
" & (df.took_treatment)\n", | |
")\n", | |
"\n", | |
"# if the participant is of the 'hurt' type,\n", | |
"# they'll have a good outcome as long as they\n", | |
"# did NOT take the treatment!\n", | |
"\n", | |
"df[\"good_outcome\"] = df[\"good_outcome\"] | (\n", | |
" (df.response_type == \"hurt\")\n", | |
" & (~df.took_treatment)\n", | |
")\n", | |
"\n", | |
"# Otherwise, the outcome is going to be bad\n", | |
"# and the column will have a False value." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We can now observe the probabilities of each (Treatment, Outcome) combination that would emerge, conditional on the assignment." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df[\"n\"] = 1\n", | |
"results = (\n", | |
" df.groupby(\n", | |
" [\n", | |
" \"assignment\",\n", | |
" \"took_treatment\",\n", | |
" \"good_outcome\",\n", | |
" ]\n", | |
" )\n", | |
" .count()\n", | |
" .n\n", | |
")\n", | |
"results = results.to_frame()\n", | |
"results[\"assignment_n\"] = results.groupby(\n", | |
" \"assignment\"\n", | |
").transform(\"sum\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>P( X, Y | Z )</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>assignment</th>\n", | |
" <th>took_treatment</th>\n", | |
" <th>good_outcome</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">control</th>\n", | |
" <th rowspan=\"2\" valign=\"top\">False</th>\n", | |
" <th>False</th>\n", | |
" <td>0.315285</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>True</th>\n", | |
" <td>0.043756</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">True</th>\n", | |
" <th>False</th>\n", | |
" <td>0.323676</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>True</th>\n", | |
" <td>0.317283</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">treatment</th>\n", | |
" <th rowspan=\"2\" valign=\"top\">False</th>\n", | |
" <th>False</th>\n", | |
" <td>0.019820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>True</th>\n", | |
" <td>0.670671</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">True</th>\n", | |
" <th>False</th>\n", | |
" <td>0.167968</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>True</th>\n", | |
" <td>0.141542</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" P( X, Y | Z )\n", | |
"assignment took_treatment good_outcome \n", | |
"control False False 0.315285\n", | |
" True 0.043756\n", | |
" True False 0.323676\n", | |
" True 0.317283\n", | |
"treatment False False 0.019820\n", | |
" True 0.670671\n", | |
" True False 0.167968\n", | |
" True 0.141542" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"p_states = results.n / results.assignment_n\n", | |
"p_states = p_states.rename(\"P( X, Y | Z )\")\n", | |
"\n", | |
"# display\n", | |
"p_states.to_frame()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment