fgregg/path_model.R

## path_model.R
pums <- read.csv("small_pums.csv")

pums$ESR <- factor(pums$ESR,
                   labels=c("civilian employed, at work",
                            "civilian employed, with a job but not at work",
                            "unemployed",
                            "armed forces, at work",
                            "not in labor force"))

pums$college <- factor(pums$SCHL > 17, labels=c("no college", "some college"))

# Show the counts of each occupation code
table(factor((pums$OCCP12)))

# Calculate the mean wage for each occupation code
average_wages <- aggregate(WAGP ~ OCCP12, pums, mean)

# We want to split occupation into high and low income levels. We'll
# find the median value for the *average* occupation wage
median_occupation_wage <- median(average_wages$WAGP[average_wages$WAGP > 0])

top_occupations <- average_wages[average_wages$WAGP > median_occupation_wage,]
bottom_occupations <- average_wages[average_wages$WAGP <= median_occupation_wage,]

# create our binary occupation level variable
pums$occupation_level <- pums$OCCP12 %in% top_occupations$OCCP12

# Now, for each field of degree, we find the percentage of people who
# end up in a high income level occupation (the means of 1's and 0's is
# the proportion of 1's)
feeder_degrees <- aggregate(occupation_level ~ FOD1P, pums, mean)

# if at least 80% of of people with a certain degree end up in a high
# income occupation, we'll code that degree as a higher feeder
pums$feeder_level <-pums$FOD1P %in% feeder_degrees[feeder_degrees$occupation_level >= 0.8, "FOD1P"]

# keep NAs
pums$feeder_level[is.na(pums$FOD1P)] <- NA

# Give our variable meaningful names
pums$occupation_level <- factor(pums$occupation_level, labels=c("low", "high"))
pums$feeder_level <- factor(pums$feeder_level, labels=c("low", "high"))


model.1 <- lm(WAGP ~ SEX + occupation_level,
              data=pums[pums$ESR=="civilian employed, at work",])

model.2 <- glm(occupation_level ~ SEX + feeder_level,
               family=binomial,
               data=pums[pums$ESR=="civilian employed, at work",])

model.3 <- glm(feeder_level~ SEX,
               family=binomial,
               data=pums[pums$ESR=="civilian employed, at work",])
	pums <- read.csv("small_pums.csv")

	pums$ESR <- factor(pums$ESR,
	labels=c("civilian employed, at work",
	"civilian employed, with a job but not at work",
	"unemployed",
	"armed forces, at work",
	"not in labor force"))

	pums$college <- factor(pums$SCHL > 17, labels=c("no college", "some college"))

	# Show the counts of each occupation code
	table(factor((pums$OCCP12)))

	# Calculate the mean wage for each occupation code
	average_wages <- aggregate(WAGP ~ OCCP12, pums, mean)

	# We want to split occupation into high and low income levels. We'll
	# find the median value for the average occupation wage
	median_occupation_wage <- median(average_wages$WAGP[average_wages$WAGP > 0])

	top_occupations <- average_wages[average_wages$WAGP > median_occupation_wage,]
	bottom_occupations <- average_wages[average_wages$WAGP <= median_occupation_wage,]

	# create our binary occupation level variable
	pums$occupation_level <- pums$OCCP12 %in% top_occupations$OCCP12

	# Now, for each field of degree, we find the percentage of people who
	# end up in a high income level occupation (the means of 1's and 0's is
	# the proportion of 1's)
	feeder_degrees <- aggregate(occupation_level ~ FOD1P, pums, mean)

	# if at least 80% of of people with a certain degree end up in a high
	# income occupation, we'll code that degree as a higher feeder
	pums$feeder_level <-pums$FOD1P %in% feeder_degrees[feeder_degrees$occupation_level >= 0.8, "FOD1P"]

	# keep NAs
	pums$feeder_level[is.na(pums$FOD1P)] <- NA

	# Give our variable meaningful names
	pums$occupation_level <- factor(pums$occupation_level, labels=c("low", "high"))
	pums$feeder_level <- factor(pums$feeder_level, labels=c("low", "high"))


	model.1 <- lm(WAGP ~ SEX + occupation_level,
	data=pums[pums$ESR=="civilian employed, at work",])

	model.2 <- glm(occupation_level ~ SEX + feeder_level,
	family=binomial,
	data=pums[pums$ESR=="civilian employed, at work",])

	model.3 <- glm(feeder_level~ SEX,
	family=binomial,
	data=pums[pums$ESR=="civilian employed, at work",])