Skip to content

Instantly share code, notes, and snippets.

@tvladeck
Created March 18, 2019 16:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tvladeck/cb49a41c167eac12264efd909c2c4eeb to your computer and use it in GitHub Desktop.
Save tvladeck/cb49a41c167eac12264efd909c2c4eeb to your computer and use it in GitHub Desktop.
income inequality analysis
# from IPUMS
# includes 1950 forward
# includes individual income
# sex
# age
raw_data <- data.table::fread("~/Downloads/usa_00008.csv")
library(tidyverse)
includes_income <- raw_data %>%
filter(!is.na(INCTOT)) %>%
filter(INCTOT != 9999999) %>%
filter(INCTOT != -000001) %>%
filter(INCTOT != -009995)
# http://www.in2013dollars.com/us/inflation/1950
adjusted_income <-
includes_income %>%
mutate(
ADJINC = case_when(
YEAR == 1950 ~ INCTOT * 10.49,
YEAR == 1960 ~ INCTOT * 8.54,
YEAR == 1970 ~ INCTOT * 6.51,
YEAR == 1980 ~ INCTOT * 3.07,
YEAR == 1990 ~ INCTOT * 1.93,
YEAR == 2000 ~ INCTOT * 1.47,
YEAR == 2010 ~ INCTOT * 1.16,
YEAR == 2017 ~ INCTOT * 1.03
)
) %>%
mutate(ADJYEAR = YEAR - 1950)
library(mgcv)
library(qgam)
mod_05 <- qgam(ADJINC ~ s(YEAR, k = 4) + s(AGE) + factor(SEX), # weight
data = sample_n(adjusted_income, 10000, weight = PERWT, replace = T), qu = 0.05)
mod_10 <- qgam(ADJINC ~ s(YEAR, k = 4) + s(AGE) + factor(SEX), # weight
data = sample_n(adjusted_income, 10000, weight = PERWT, replace = T), qu = 0.1)
mod_50 <- qgam(ADJINC ~ s(YEAR, k = 4) + s(AGE) + factor(SEX), # weight
data = sample_n(adjusted_income, 10000, weight = PERWT, replace = T), qu = 0.5)
mod_90 <- qgam(ADJINC ~ s(YEAR, k = 4) + s(AGE) + factor(SEX), # weight
data = sample_n(adjusted_income, 10000, weight = PERWT, replace = T), qu = 0.9)
mod_95 <- qgam(ADJINC ~ s(YEAR, k = 4) + s(AGE) + factor(SEX), # weight
data = sample_n(adjusted_income, 10000, weight = PERWT, replace = T), qu = 0.95)
par(mfrow = c(1,3))
mod_10 %>% plot(scale = 0, select = 1, main = "10th percentile", ylab = "INCOME")
mod_50 %>% plot(scale = 0, select = 1, main = "50th percentile", ylab = "")
mod_90 %>% plot(scale = 0, select = 1, main = "90th percentile", ylab = "")
par(mfrow = c(1,1))
par(mfrow = c(1,2))
mod_05 %>% plot(scale = 0, select = 1, main = "5th percentile", ylab = "INCOME")
mod_95 %>% plot(scale = 0, select = 1, main = "95th percentile", ylab = "")
par(mfrow = c(1,1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment