Created
March 9, 2015 21:12
-
-
Save fgregg/5dab0930e71fa2612837 to your computer and use it in GitHub Desktop.
Path model with logistic regression.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pums <- read.csv("small_pums.csv") | |
pums$ESR <- factor(pums$ESR, | |
labels=c("civilian employed, at work", | |
"civilian employed, with a job but not at work", | |
"unemployed", | |
"armed forces, at work", | |
"not in labor force")) | |
pums$college <- factor(pums$SCHL > 17, labels=c("no college", "some college")) | |
# Show the counts of each occupation code | |
table(factor((pums$OCCP12))) | |
# Calculate the mean wage for each occupation code | |
average_wages <- aggregate(WAGP ~ OCCP12, pums, mean) | |
# We want to split occupation into high and low income levels. We'll | |
# find the median value for the *average* occupation wage | |
median_occupation_wage <- median(average_wages$WAGP[average_wages$WAGP > 0]) | |
top_occupations <- average_wages[average_wages$WAGP > median_occupation_wage,] | |
bottom_occupations <- average_wages[average_wages$WAGP <= median_occupation_wage,] | |
# create our binary occupation level variable | |
pums$occupation_level <- pums$OCCP12 %in% top_occupations$OCCP12 | |
# Now, for each field of degree, we find the percentage of people who | |
# end up in a high income level occupation (the means of 1's and 0's is | |
# the proportion of 1's) | |
feeder_degrees <- aggregate(occupation_level ~ FOD1P, pums, mean) | |
# if at least 80% of of people with a certain degree end up in a high | |
# income occupation, we'll code that degree as a higher feeder | |
pums$feeder_level <-pums$FOD1P %in% feeder_degrees[feeder_degrees$occupation_level >= 0.8, "FOD1P"] | |
# keep NAs | |
pums$feeder_level[is.na(pums$FOD1P)] <- NA | |
# Give our variable meaningful names | |
pums$occupation_level <- factor(pums$occupation_level, labels=c("low", "high")) | |
pums$feeder_level <- factor(pums$feeder_level, labels=c("low", "high")) | |
model.1 <- lm(WAGP ~ SEX + occupation_level, | |
data=pums[pums$ESR=="civilian employed, at work",]) | |
model.2 <- glm(occupation_level ~ SEX + feeder_level, | |
family=binomial, | |
data=pums[pums$ESR=="civilian employed, at work",]) | |
model.3 <- glm(feeder_level~ SEX, | |
family=binomial, | |
data=pums[pums$ESR=="civilian employed, at work",]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment