Skip to content

Instantly share code, notes, and snippets.

@NaimKabir
Created March 18, 2019 17:21
Show Gist options
  • Save NaimKabir/f9ccba1ba05f4cee71cf40555e9e3017 to your computer and use it in GitHub Desktop.
Save NaimKabir/f9ccba1ba05f4cee71cf40555e9e3017 to your computer and use it in GitHub Desktop.
rct simulation
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from itertools import product"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Participant simulation\n",
"\n",
"# we'll create 1000 participants in our experiment\n",
"\n",
"n = 1000"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# let's create those participant partitions we discussed above\n",
"\n",
"compliance_partitions = ['always_taker', 'complier', 'defier', 'never_taker']\n",
"response_partitions = ['always_better', 'helped', 'hurt', 'never_better']\n",
"partition_types = np.array(list(product(compliance_partitions, response_partitions)))\n",
"\n",
"# we can also simulate probabilities that our participants will belong to one of the\n",
"# 16 possible behavior combinations\n",
"\n",
"partition_probabilities = np.random.random(16)\n",
"partition_probabilities = partition_probabilities / partition_probabilities.sum()\n",
"\n",
"# to be a true set of probabilities, the vector sum needs to be 1\n",
"# sometimes this can fail because of precision errors\n",
"assert partition_probabilities.sum() == 1 "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# drawing participant compliance and response behaviors according to the\n",
"# specified distribution\n",
"\n",
"participant_partition = np.random.choice(range(len(partition_types)), n, p=partition_probabilities)\n",
"compliance_type, response_type = list(zip(*partition_types[participant_partition]))\n",
"\n",
"# assigning participants to Control and Treatment groups with 50% probability\n",
"\n",
"assignments = np.array(['control', 'treatment'])\n",
"participant_assignment = assignments[np.concatenate([np.zeros(n//2), np.ones(n//2)]).astype('int32')]\n",
"\n",
"# compiling all information into our dataframe\n",
"\n",
"df = pd.DataFrame({'assignment': participant_assignment,\n",
" 'compliance_type': compliance_type,\n",
" 'response_type': response_type})"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Simulate outcomes\n",
"\n",
"# depending on assignment and compliance type, did the participant take the treatment?\n",
"\n",
"# if the participant is an always_taker, they'll always take the treatment.\n",
"# if they're a complier, they'll take the treatment as long as they're in the treatment condition.\n",
"# if they're a defier, they'll only take the treatment if they were in the control condition.\n",
"df['took_treatment'] = (df.compliance_type == 'always_taker') \\\n",
" | ( (df.compliance_type == 'complier') & (df.assignment == 'treatment')) \\\n",
" | ( (df.compliance_type == 'defier') & (df.assignment == 'control'))\n",
"\n",
"# depending on whether they took the treatment and their response_type, \n",
"# what was the participant's outcome?\n",
"\n",
"# if the participant is of the always_better type, they'll definitely have a good outcome.\n",
"# if the participant is of the 'helped' type, they'll have a good outcome as long as they\n",
"# took treatment.\n",
"df['good_outcome'] = (df.response_type == 'always_better') \\\n",
" | ( (df.response_type == 'helped') & (df.took_treatment) )\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>probabilities</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>treatment/untreated/bad</th>\n",
" <td>0.278</td>\n",
" </tr>\n",
" <tr>\n",
" <th>treatment/untreated/good</th>\n",
" <td>0.174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>treatment/treated/bad</th>\n",
" <td>0.344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>treatment/treated/good</th>\n",
" <td>0.204</td>\n",
" </tr>\n",
" <tr>\n",
" <th>control/untreated/bad</th>\n",
" <td>0.336</td>\n",
" </tr>\n",
" <tr>\n",
" <th>control/untreated/good</th>\n",
" <td>0.182</td>\n",
" </tr>\n",
" <tr>\n",
" <th>control/treated/bad</th>\n",
" <td>0.316</td>\n",
" </tr>\n",
" <tr>\n",
" <th>control/treated/good</th>\n",
" <td>0.166</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" probabilities\n",
"treatment/untreated/bad 0.278\n",
"treatment/untreated/good 0.174\n",
"treatment/treated/bad 0.344\n",
"treatment/treated/good 0.204\n",
"control/untreated/bad 0.336\n",
"control/untreated/good 0.182\n",
"control/treated/bad 0.316\n",
"control/treated/good 0.166"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we can observe the probabilities of each Assignment, Treatment, Outcome\n",
"# combinations that would emerge\n",
"\n",
"# get all the probabilities we need: p(z,x,y) for each z,x,y combination\n",
"\n",
"# we can get all states by taking the cartesian product of the different\n",
"# binary possibilities: treatment vs. control group, took_treatment=True vs. False, etc.\n",
"states = product(['treatment','control'],[False, True], [False, True])\n",
"\n",
"# this is an ugly list comprehension that calculates probabilities for each of the states\n",
"# we generated above\n",
"p_states = {f\"{assignment}/{'treated' if treated == 1 else 'untreated'}/{'good' if outcome == 1 else 'bad'}\" : \n",
" ( (df[df.assignment == assignment].took_treatment == treated)\n",
" & (df[df.assignment == assignment].good_outcome == outcome) ).mean()\n",
" for assignment, treated, outcome in states\n",
" }\n",
"\n",
"# display:\n",
"pd.DataFrame(p_states, index=['probabilities']).T"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment