Skip to content

Instantly share code, notes, and snippets.

@andrewheiss
Created November 2, 2019 01:48
Show Gist options
  • Save andrewheiss/a80dbd86977eaa05f38894c2329f79b3 to your computer and use it in GitHub Desktop.
Save andrewheiss/a80dbd86977eaa05f38894c2329f79b3 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(wakefield)
library(rdrobust)
library(rddensity)
library(broom)
library(huxtable)

# Make fake data
set.seed(1234)
df <- r_data_frame(
  n = 1000,
  id,
  attendance = rbeta(shape1 = 7, shape2 = 2)
) %>% 
  mutate(attendance = rescale(attendance, to = c(20, 100))) %>% 
  mutate(treatment = attendance < 80) %>% 
  mutate(grade = (200 * treatment) + (20 * attendance) + rnorm(1000, 600, 100)) %>% 
  mutate(grade = rescale(grade, to = c(0, 100))) 

# Is this sharp or fuzzy? (It's sharp because I made it that way)
ggplot(df, aes(x = attendance, y = treatment, color = treatment)) +
  geom_point(size = 0.5, alpha = 0.5) +
  guides(color = FALSE) +
  labs(x = "% attendance", y = "Received treatment") +
  theme_minimal()

# Distribution of running variable looks okay and discontinuity-free
rddensity(df$attendance, c = 80) %>% summary()
#> 
#> RD Manipulation Test using local polynomial density estimation.
#> 
#> Number of obs =       1000
#> Model =               unrestricted
#> Kernel =              triangular
#> BW method =           comb
#> VCE method =          jackknife
#> 
#> Cutoff c = 80         Left of c           Right of c          
#> Number of obs         553                 447                 
#> Eff. Number of obs    263                 284                 
#> Order est. (p)        2                   2                   
#> Order bias (q)        3                   3                   
#> BW est. (h)           10.783              9.638               
#> 
#> Method                T                   P > |T|             
#> Robust                0.3687              0.7123
rdplotdensity(rddensity(df$attendance, c = 80), df$attendance)

#> $Estl
#> Call: lpdensity
#> 
#> Sample size                                      553
#> Polynomial order for point estimation    (p=)    2
#> Order of derivative estimated            (v=)    1
#> Polynomial order for confidence interval (q=)    3
#> Kernel function                                  triangular
#> Scaling factor                                   0.552552552552553
#> Bandwidth method                                 user provided
#> 
#> Use summary(...) to show estimates.
#> 
#> $Estr
#> Call: lpdensity
#> 
#> Sample size                                      447
#> Polynomial order for point estimation    (p=)    2
#> Order of derivative estimated            (v=)    1
#> Polynomial order for confidence interval (q=)    3
#> Kernel function                                  triangular
#> Scaling factor                                   0.446446446446446
#> Bandwidth method                                 user provided
#> 
#> Use summary(...) to show estimates.
#> 
#> $Estplot

# Plot outcome and running variable
ggplot(df, aes(x = attendance, y = grade, color = treatment)) +
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(data = filter(df, attendance < 80), method = "lm") +
  geom_smooth(data = filter(df, attendance >= 80), method = "lm") + 
  geom_vline(xintercept = 80) +
  guides(color = FALSE) +
  labs(x = "% attendance", y = "% final grade") +
  theme_minimal()

# Non-parametric RD with different kernels
rdrobust(df$grade, df$attendance, c = 80, kernel = "triangular") %>% summary()  # Default
#> Call: rdrobust
#> 
#> Number of Obs.                 1000
#> BW type                       mserd
#> Kernel                   Triangular
#> VCE method                       NN
#> 
#> Number of Obs.                 553         447
#> Eff. Number of Obs.            131         155
#> Order est. (p)                   1           1
#> Order bias  (p)                  2           2
#> BW est. (h)                  5.033       5.033
#> BW bias (b)                  8.541       8.541
#> rho (h/b)                    0.589       0.589
#> 
#> =============================================================================
#>         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
#> =============================================================================
#>   Conventional   -13.644     1.402    -9.734     0.000   [-16.391 , -10.896]   
#>         Robust         -         -    -8.783     0.000   [-17.321 , -11.001]   
#> =============================================================================
rdrobust(df$grade, df$attendance, c = 80, kernel = "epanechnikov") %>% summary()
#> Call: rdrobust
#> 
#> Number of Obs.                 1000
#> BW type                       mserd
#> Kernel                   Epanechnikov
#> VCE method                       NN
#> 
#> Number of Obs.                 553         447
#> Eff. Number of Obs.            137         159
#> Order est. (p)                   1           1
#> Order bias  (p)                  2           2
#> BW est. (h)                  5.247       5.247
#> BW bias (b)                  8.801       8.801
#> rho (h/b)                    0.596       0.596
#> 
#> =============================================================================
#>         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
#> =============================================================================
#>   Conventional   -13.307     1.370    -9.712     0.000   [-15.992 , -10.622]   
#>         Robust         -         -    -8.577     0.000   [-16.829 , -10.568]   
#> =============================================================================
rdrobust(df$grade, df$attendance, c = 80, kernel = "uniform") %>% summary()
#> Call: rdrobust
#> 
#> Number of Obs.                 1000
#> BW type                       mserd
#> Kernel                      Uniform
#> VCE method                       NN
#> 
#> Number of Obs.                 553         447
#> Eff. Number of Obs.            178         206
#> Order est. (p)                   1           1
#> Order bias  (p)                  2           2
#> BW est. (h)                  6.915       6.915
#> BW bias (b)                 11.902      11.902
#> rho (h/b)                    0.581       0.581
#> 
#> =============================================================================
#>         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
#> =============================================================================
#>   Conventional   -12.252     1.187   -10.321     0.000   [-14.578 , -9.925]    
#>         Robust         -         -    -8.866     0.000   [-15.186 , -9.688]    
#> =============================================================================

# Check potential data-driven bandwith options
# This says +/- 5ish
rdbwselect(df$grade, df$attendance, c = 80) %>% summary()
#> Call: rdbwselect
#> 
#> Number of Obs.                 1000
#> BW type                       mserd
#> Kernel                   Triangular
#> VCE method                       NN
#> 
#> Number of Obs.                 553         447
#> Order est. (p)                   1           1
#> Order bias  (q)                  2           2
#> 
#> =======================================================
#>                   BW est. (h)    BW bias (b)
#>             Left of c Right of c  Left of c Right of c
#> =======================================================
#>      mserd     5.033      5.033      8.541      8.541
#> =======================================================

# Check best bandwidth + half bandwidth + twice bandwidth
rdrobust(df$grade, df$attendance, c = 80, h = 5.03) %>% summary()
#> Call: rdrobust
#> 
#> Number of Obs.                 1000
#> BW type                      Manual
#> Kernel                   Triangular
#> VCE method                       NN
#> 
#> Number of Obs.                 553         447
#> Eff. Number of Obs.            131         155
#> Order est. (p)                   1           1
#> Order bias  (p)                  2           2
#> BW est. (h)                  5.030       5.030
#> BW bias (b)                  5.030       5.030
#> rho (h/b)                    1.000       1.000
#> 
#> =============================================================================
#>         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
#> =============================================================================
#>   Conventional   -13.645     1.402    -9.732     0.000   [-16.393 , -10.897]   
#>         Robust         -         -    -8.123     0.000   [-18.606 , -11.372]   
#> =============================================================================
rdrobust(df$grade, df$attendance, c = 80, h = 5.03 / 2) %>% summary()
#> Call: rdrobust
#> 
#> Number of Obs.                 1000
#> BW type                      Manual
#> Kernel                   Triangular
#> VCE method                       NN
#> 
#> Number of Obs.                 553         447
#> Eff. Number of Obs.             61          78
#> Order est. (p)                   1           1
#> Order bias  (p)                  2           2
#> BW est. (h)                  2.515       2.515
#> BW bias (b)                  2.515       2.515
#> rho (h/b)                    1.000       1.000
#> 
#> =============================================================================
#>         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
#> =============================================================================
#>   Conventional   -15.155     1.744    -8.692     0.000   [-18.572 , -11.738]   
#>         Robust         -         -    -9.484     0.000   [-21.080 , -13.859]   
#> =============================================================================
rdrobust(df$grade, df$attendance, c = 80, h = 5.03 * 2) %>% summary()
#> Call: rdrobust
#> 
#> Number of Obs.                 1000
#> BW type                      Manual
#> Kernel                   Triangular
#> VCE method                       NN
#> 
#> Number of Obs.                 553         447
#> Eff. Number of Obs.            251         293
#> Order est. (p)                   1           1
#> Order bias  (p)                  2           2
#> BW est. (h)                 10.060      10.060
#> BW bias (b)                 10.060      10.060
#> rho (h/b)                    1.000       1.000
#> 
#> =============================================================================
#>         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
#> =============================================================================
#>   Conventional   -12.432     1.062   -11.705     0.000   [-14.514 , -10.351]   
#>         Robust         -         -    -8.752     0.000   [-15.819 , -10.030]   
#> =============================================================================

# Plot this (though not with ggplot ::sadface::)
rdplot(df$grade, df$attendance, c = 80)

# Parametric estimation
# If we center the variable it's easier to interpret the attendance coefficient
df <- df %>% 
  mutate(attendance_centered = attendance - 80)

# Regular regression
model1 <- lm(grade ~ attendance_centered + treatment, data = df)

# Narrower bandwidth with regular regression
model2 <- lm(grade ~ attendance + treatment, 
             data = filter(df, attendance_centered < 5, attendance_centered > -5))

# Polynomial regression
model3 <- lm(grade ~ attendance_centered + I(attendance_centered^2) + treatment, data = df)
model4 <- lm(grade ~ attendance_centered + I(attendance_centered^2) + 
               I(attendance_centered^3) + treatment, data = df)

# All together
huxreg(model1, model2, model3, model4)
─────────────────────────────────────────────────────────────────────────────────────
                            (1)            (2)             (3)             (4)       
                     ────────────────────────────────────────────────────────────────
  (Intercept)             65.839 ***    -48.349 *        65.822 ***      66.023 ***  
                          (0.324)       (19.146)         (0.438)         (0.452)     
  attendance_centere       1.163 ***                      1.165 ***       1.174 ***  
  d                                                                                  
                          (0.020)                        (0.035)         (0.036)     
  treatmentTRUE           11.574 ***     12.781 ***      11.597 ***      11.726 ***  
                          (0.589)        (1.342)         (0.725)         (0.728)     
  attendance                              1.421 ***                                  
                                         (0.232)                                     
  I(attendance_cente                                      0.000          -0.002      
  red^2)                                                                             
                                                         (0.001)         (0.002)     
  I(attendance_cente                                                     -0.000      
  red^3)                                                                             
                                                                         (0.000)     
                     ────────────────────────────────────────────────────────────────
  N                     1000            286            1000            1000          
  R2                       0.823          0.279           0.823           0.824      
  logLik               -3201.077       -897.957       -3201.075       -3199.508      
  AIC                   6410.153       1803.915        6412.150        6411.016      
─────────────────────────────────────────────────────────────────────────────────────
  *** p < 0.001; ** p < 0.01; * p < 0.05.                                            
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment