Skip to content

Instantly share code, notes, and snippets.

@fgregg
Created March 9, 2015 21:12
Show Gist options
  • Save fgregg/5dab0930e71fa2612837 to your computer and use it in GitHub Desktop.
Save fgregg/5dab0930e71fa2612837 to your computer and use it in GitHub Desktop.
Path model with logistic regression.
pums <- read.csv("small_pums.csv")
pums$ESR <- factor(pums$ESR,
labels=c("civilian employed, at work",
"civilian employed, with a job but not at work",
"unemployed",
"armed forces, at work",
"not in labor force"))
pums$college <- factor(pums$SCHL > 17, labels=c("no college", "some college"))
# Show the counts of each occupation code
table(factor((pums$OCCP12)))
# Calculate the mean wage for each occupation code
average_wages <- aggregate(WAGP ~ OCCP12, pums, mean)
# We want to split occupation into high and low income levels. We'll
# find the median value for the *average* occupation wage
median_occupation_wage <- median(average_wages$WAGP[average_wages$WAGP > 0])
top_occupations <- average_wages[average_wages$WAGP > median_occupation_wage,]
bottom_occupations <- average_wages[average_wages$WAGP <= median_occupation_wage,]
# create our binary occupation level variable
pums$occupation_level <- pums$OCCP12 %in% top_occupations$OCCP12
# Now, for each field of degree, we find the percentage of people who
# end up in a high income level occupation (the means of 1's and 0's is
# the proportion of 1's)
feeder_degrees <- aggregate(occupation_level ~ FOD1P, pums, mean)
# if at least 80% of of people with a certain degree end up in a high
# income occupation, we'll code that degree as a higher feeder
pums$feeder_level <-pums$FOD1P %in% feeder_degrees[feeder_degrees$occupation_level >= 0.8, "FOD1P"]
# keep NAs
pums$feeder_level[is.na(pums$FOD1P)] <- NA
# Give our variable meaningful names
pums$occupation_level <- factor(pums$occupation_level, labels=c("low", "high"))
pums$feeder_level <- factor(pums$feeder_level, labels=c("low", "high"))
model.1 <- lm(WAGP ~ SEX + occupation_level,
data=pums[pums$ESR=="civilian employed, at work",])
model.2 <- glm(occupation_level ~ SEX + feeder_level,
family=binomial,
data=pums[pums$ESR=="civilian employed, at work",])
model.3 <- glm(feeder_level~ SEX,
family=binomial,
data=pums[pums$ESR=="civilian employed, at work",])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment