Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Selection simulations
// Selection bias for normally-distributed effect with choice
// Setup
qui {
clear all
cls
version 13
// Reproducibility with bit.ly/stata-random
set seed 422698 // Timestamp: 2021-01-22 16:41:57 UTC
// Request parameter values
cap prog drop selection_setup
prog def selection_setup
di in smcl `"{ red : Welcome to the selection bias simulation program.}"'
di in smcl `"{ red : Please enter the requested parameter values:}"'
di `"Enter number of observations (integer)"' _request(n)
di `"Enter true effect size (real)"' _request(t)
di `"Enter number of iterations (integer)"' _request(it)
end
}
// Request parameters
selection_setup
qui {
// Create simulation program
cap prog drop selection
prog def selection , rclass // Allow statistics output
qui {
syntax , [pause] [exit] [clear] // Allow pausing and breaking
if "`exit'" == "" preserve // Reset Stata state after finishing program run
// Set parameters for simulation
local n = ${n} // Number of observations
local t = ${t} // True β of treatment effect
local v = 1 // True standard deviation of treatment effect
local s = 1 // True standard deviation of potential outcomes
// Potential outcomes
`clear'
set obs `n' // Create simulation observations
gen id = _n // ID variable
lab var id "Unique ID"
gen y_0 = rnormal(0,`s') // Potential outcome 0: untreated
lab var y_0 "Untreated Potential Outcome"
gen y_1 = y_0 + rnormal(`t',`v') // Potential outcome 1: treated
lab var y_1 "Treated Potential Outcome"
// Allow choice if positive returns
gen return = y_1 - y_0
lab var return "Individual Gain to Education"
gen education = (return > 0) // Choose education
lab var education "Educated"
gen y = y_0 // Observed outcome
replace y = y_1 if (education == 1)
lab var y "Observed Outcome"
// Create randomization
gen y_rand = y_0 // Observed outcome under randomization
gen education_rand = (rnormal() > 0)
lab var education_rand "Randomized Treatment"
replace y_rand = y_1 if (education_rand == 1)
lab var y_rand "Randomized Observed Outcome"
// Regression time
sum return // Investigate true β
return scalar true = `r(mean)'
reg y education // Investigate observed β
mat results = r(table) // Get regression results
local beta = results[1,1]
local pval = results[4,1]
// Push results to main memory
return scalar sigb = (`pval' < 0.05)
return scalar beta = `beta'
return matrix results = results
// Regression time (randomization)
reg y_rand education_rand // Investigate observed β
mat results = r(table) // Get regression results
local beta_rand = results[1,1]
local pval_rand = results[4,1]
// Push results to main memory
return scalar sigb_rand = (`pval_rand' < 0.05)
return scalar beta_rand = `beta_rand'
return matrix results_rand = results
// Pause or here if we want to see microdata
pause on
`pause' // Type "BREAK" in console to quit, "q" to continue
`exit' // Ends here if [exit] option called
}
end
// Multiple simulations
clear
tempfile sims
}
simulate r(true) r(beta) r(beta_rand) r(sigb) r(sigb_rand) ///
, reps(${it}) saving(`sims') seed(858488) /// Timestamp: 2021-01-22 17:25:50 UTC
: selection , clear
qui {
// Set up graphing
noi di in smcl `"{ red : Compiling results visualization...}"' _newline _newline
use `sims' , clear
qui su _sim_2
local m2 = r(min)
qui su _sim_3
local m3 = r(min)
local min = min(`m2',`m3')
// Visualize
tw ///
(histogram _sim_2 , w(0.05) lc(none) fc(red%75) barwidth(0.04) start(`min') freq) ///
(histogram _sim_2 if (_sim_4 == 1), w(0.05) lc(black) fc(none) barwidth(0.04) start(`min') freq) ///
(histogram _sim_1 , w(0.05) lc(none) fc(black%50) barwidth(0.04) start(`min') freq) ///
, legend(on order(0 "Effect Size With Choice:" 1 "Estimated Returns" 2 "Significant Estimates" 3 "True Returns") ///
ring(1) pos(12) r(1) symxsize(small) region(lc(none))) ///
nodraw saving(g1.gph , replace)
// Visualize with randomization
tw ///
(histogram _sim_3 , w(0.05) lc(none) fc(red%75) barwidth(0.045) start(`min') freq) ///
(histogram _sim_3 if (_sim_5 == 1), w(0.05) lc(black) fc(none) barwidth(0.045) start(`min') freq) ///
(histogram _sim_1 , w(0.05) lc(none) fc(black%50) barwidth(0.04) start(`min') freq) ///
, legend(on order(0 "Effect Size With Randomization:" 1 "Estimated Returns" 2 "Significant Estimates" 3 "True Returns") ///
ring(1) pos(12) r(1) symxsize(small) region(lc(none))) ///
nodraw saving(g2.gph , replace)
// Graph
graph combine g1.gph g2.gph , c(1) xcom
!rm g1.gph g2.gph
}
// End of dofile
// Selection bias for normally-distributed effect with choice
// Setup
clear
cls
version 13
// Reproducibility with bit.ly/stata-random
set seed 422698 // Timestamp: 2021-01-22 16:41:57 UTC
// Create simulation program
cap prog drop selection
prog def selection , rclass // Allow statistics output
syntax , [pause] [exit] [clear] // Allow pausing and breaking
if "`exit'" == "" preserve // Reset Stata state after finishing program run
// Set parameters for simulation
local n = 1000 // Number of observations
local t = 0 // True β of treatment effect
local v = 1 // True standard deviation of treatment effect
local s = 1 // True standard deviation of potential outcomes
// Potential outcomes
`clear'
set obs `n' // Create simulation observations
gen id = _n // ID variable
lab var id "Unique ID"
gen y_0 = rnormal(0,`s') // Potential outcome 0: untreated
lab var y_0 "Untreated Potential Outcome"
gen y_1 = y_0 + rnormal(`t',`v') // Potential outcome 1: treated
lab var y_1 "Treated Potential Outcome"
// Allow choice if positive returns
gen return = y_1 - y_0
lab var return "Individual Gain to Education"
gen education = (return > 0) // Choose education
lab var education "Educated"
gen y = y_0 // Observed outcome
replace y = y_1 if (education == 1)
lab var y "Observed Outcome"
// Create randomization
gen y_rand = y_0 // Observed outcome under randomization
gen education_rand = (rnormal() > 0)
lab var education_rand "Randomized Treatment"
replace y_rand = y_1 if (education_rand == 1)
lab var y_rand "Randomized Observed Outcome"
// Regression time
sum return // Investigate true β
return scalar true = `r(mean)'
reg y education // Investigate observed β
mat results = r(table) // Get regression results
local beta = results[1,1]
// Push results to main memory
return scalar beta = `beta'
return matrix results = results
// Regression time (randomization)
reg y_rand education_rand // Investigate observed β
mat results = r(table) // Get regression results
local beta_rand = results[1,1]
// Push results to main memory
return scalar beta_rand = `beta_rand'
return matrix results_rand = results
// Pause or here if we want to see microdata
pause on
`pause' // Type "BREAK" in console to quit, "q" to continue
`exit' // Ends here if [exit] option called
end
// Single simulation run
selection
matlist r(results) // View returned estimates
di r(beta) // View estimated beta
di r(true) // View true beta
// Single simulation run with microdata
selection, exit
// Multiple simulations
clear
tempfile sims
simulate r(true) r(beta) r(beta_rand) ///
, reps(100) saving(`sims') seed(858488) /// Timestamp: 2021-01-22 17:25:50 UTC
: selection , clear
use `sims' , clear
// Visualize
tw ///
(histogram _sim_2 , w(0.05) lc(none) fc(red) barwidth(0.04) start(-0.2) freq) ///
(histogram _sim_1 , w(0.05) lc(none) fc(gs12) barwidth(0.04) start(-0.2) freq) ///
, legend(on c(1) order(0 "With Choice:" 1 "Estimated Returns" 2 "True Returns") ring(0) pos(1))
// Visualize with randomization
tw ///
(histogram _sim_3 , w(0.05) lc(none) fc(red%75) barwidth(0.045) start(-0.2) freq) ///
(histogram _sim_1 , w(0.05) lc(none) fc(black%75) barwidth(0.04) start(-0.2) freq) ///
, legend(on c(1) order(0 "With Randomization:" 1 "Estimated Returns" 2 "True Returns") ring(0) pos(1))
// End of dofile
/*
Simulation 2:
All for a more structured wage equation. 
Lets take a more complicated function,
so that your `skills’ are √sθ.
Here, θ is exogenous ability. 
The benefits of being educated w^e=a+√sθ and,
in fact, s itself will depend on θ,
so that w^e=a+√(s(θ)θ). 
So. people choose s to max. w^e
and the costs of education are given by rs,
where r is the interest rate. So, simulation does the following:
Step 1: Draw θ from a distribution
Step 2: Let the user define the interest rate between 0 and 1
Step 3: Assign an optimal s to everyone, which just max. w^e - rs, which implies that s*=θ/(4r^2)
Step 4: Construct a regression by adding some noise
Step 5: Regress w^e on s
Then, modifications and questions for understanding this better could be:
Q1. What would happen if s was allocated randomly?
(problem here is that s is not either 0 or 1,
so perhaps see if we can bound r to make things more sensible)
Q2. Would increasing the sample size solve the problem?
*/
// Start of dofile
// Setup
clear
cls
version 13
// Reproducibility with bit.ly/stata-random
set seed 155966 // Timestamp: 2021-01-22 18:25:30 UTC
set obs 100 // allow x education levels
gen education = _n-1
tsset education
gen r = 0.05 // Interest rate (1=100%)
local n = 5 // number of individuals
local colors = "maroon navy dkgreen black dkorange"
forv i = 1/`n' {
local theta`i' = runiform()
gen payoff`i' = sqrt(education*`theta`i'')
gen margin`i' = d.payoff`i'
gen chosen`i' = margin`i' > r
gen opt`i' = f.chosen`i' != chosen`i' & education != 99
local color : word `i' of `colors'
local graphs = "`graphs' " ///
+ "(line payoff`i' education , lc(`color'))" ///
+ "(line payoff`i' education if chosen`i' == 1 , lw(vthick) lc(`color'))" ///
+ "(scatter payoff`i' education if opt`i' == 1 , mc(black) msize(large))" ///
local graphs2 = "`graphs2' " ///
+ "(line margin`i' education if chosen`i' == 1, lc(`color') lp(dash))" ///
+ "(line margin`i' education if chosen`i' == 0, lc(gs14) lp(dash))"
}
tempfile g1 g2
tw `graphs' ///
, title("Returns") xtit(" ") ///
saving(`g1'.gph, replace) nodraw
tw `graphs2' if education > 10 ///
, title("Marginal Returns") xtit(" ") yline(0.05) ///
saving(`g2'.gph, replace) nodraw ylab(0 "0" 0.05 "R")
graph combine `g1'.gph `g2'.gph , c(1) xcom ysize(7)
// End of dofile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment