Skip to content

Instantly share code, notes, and snippets.

@hughjonesd
Last active November 11, 2023 13:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hughjonesd/69bdc9379df053d092b66302276fa964 to your computer and use it in GitHub Desktop.
Save hughjonesd/69bdc9379df053d092b66302276fa964 to your computer and use it in GitHub Desktop.
Looking at birth order in Greg Clark's sample
# R code for https://open.substack.com/pub/wyclif/p/does-birth-order-matter
library(openxlsx)
library(tidyverse)
library(fixest)
library(huxtable)
# get the data from the PNAS article
vars1 <- openxlsx::read.xlsx("pnas.2300926120.sd01.xlsx", sheet = 1)
sd1 <- openxlsx::read.xlsx("pnas.2300926120.sd01.xlsx", sheet = 2)
sd1 <- as_tibble(sd1)
sd1 <- distinct(sd1, pid, .keep_all = TRUE) # remove 19 dupes
sd_sibs <- sd1 |>
filter(! is.na(pid_fath), ! is.na(pid_moth)) |>
arrange(byr) |>
mutate(
n_sibs = n(),
birth_order = min_rank(byr),
first_child = birth_order == 1,
.by = c(pid_fath, pid_moth) # within sib-groups with the same parents
) |>
filter(n_sibs <= 6)
sd_sibs <- sd_sibs |>
mutate(
n_wives_fath = n_distinct(pid_moth),
.by = pid_fath
) |>
filter(
n_wives_fath <= 2 # one guy with 1291 wives...
)
table(`N siblings` = sd_sibs$n_sibs, `Birth order` = sd_sibs$birth_order) |>
prop.table(1) |>
round(2)
mod <- list()
# higher education
mod$ded <- feols(ded ~ birth_order + byr | pid_fath^pid_moth, data = sd_sibs)
# literacy at marriage
mod$lit <- feols(lit ~ birth_order + byr | pid_fath^pid_moth, data = sd_sibs)
# occupational status
mod$occ <- feols(occ ~ birth_order + byr | pid_fath^pid_moth, data = sd_sibs)
# log house value
mod$lhv <- feols(lhv ~ birth_order + byr | pid_fath^pid_moth, data = sd_sibs)
# index of multiple deprivation 2019
mod$imd <- feols(imd ~ birth_order + byr | pid_fath^pid_moth, data = sd_sibs)
# table 1
huxreg(mod,
statistics = c(N = "nobs", "R2" = "r.squared", "Within R2" = "within.r.squared"))
# in the pooled regressions, we need to calculate father's age explicitly
sd1$pid <- as.character(sd1$pid)
sd_sibs <- left_join(sd_sibs,
sd1 |> select(pid, byrf = byr),
by = c("pid_fath" = "pid"))
sd_sibs$fath_age_birth <- sd_sibs$byr - sd_sibs$byrf
mod_pool <- list()
mod_pool$"Higher ed" <- feols(ded ~ birth_order + fath_age_birth | n_sibs, data = sd_sibs, vcov = cluster ~ pid_fath+pid_moth)
mod_pool$"Literacy" <- feols(lit ~ birth_order + fath_age_birth | n_sibs, data = sd_sibs, vcov = cluster ~ pid_fath+pid_moth)
mod_pool$"Occ. status" <- feols(occ ~ birth_order + fath_age_birth | n_sibs, data = sd_sibs, vcov = cluster ~ pid_fath+pid_moth)
mod_pool$"House value" <- feols(lhv ~ birth_order + fath_age_birth | n_sibs, data = sd_sibs, vcov = cluster ~ pid_fath+pid_moth)
mod_pool$"IMD 2019" <- feols(imd ~ birth_order + fath_age_birth | n_sibs, data = sd_sibs, vcov = cluster ~ pid_fath+pid_moth)
# table 2
huxreg(mod_pool, statistics = c(N = "nobs", R2 = "r.squared", "Within R2" = "within.r.squared"))
# other stuff not in the post
# dummy for first child only
mod_1st <- list()
mod_1st[["ded"]] <- feols(ded ~ first_child + byr | pid_fath^pid_moth, data = sd_sibs)
mod_1st[["lit"]] <- feols(lit ~ first_child + byr | pid_fath^pid_moth, data = sd_sibs)
mod_1st[["occ"]] <- feols(occ ~ first_child + byr | pid_fath^pid_moth, data = sd_sibs)
mod_1st[["lhv"]] <- feols(lhv ~ first_child + byr | pid_fath^pid_moth, data = sd_sibs)
mod_1st[["imd"]] <- feols(imd ~ first_child + byr | pid_fath^pid_moth, data = sd_sibs)
# without controlling for parental age
mod_simple <- list()
mod_simple[["ded"]] <- feols(ded ~ birth_order | pid_fath^pid_moth, data = sd_sibs)
mod_simple[["lit"]] <- feols(lit ~ birth_order | pid_fath^pid_moth, data = sd_sibs)
mod_simple[["occ"]] <- feols(occ ~ birth_order | pid_fath^pid_moth, data = sd_sibs)
mod_simple[["lhv"]] <- feols(lhv ~ birth_order | pid_fath^pid_moth, data = sd_sibs)
mod_simple[["imd"]] <- feols(imd ~ birth_order | pid_fath^pid_moth, data = sd_sibs)
cor(demean(fath_age_birth ~ n_sibs, data = sd_sibs, na.rm = FALSE),
demean(birth_order ~ n_sibs, data = sd_sibs, na.rm = FALSE),
use = "complete")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment