Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
data2006 <- read.csv("census2006.csv", header = TRUE)
data1986 <- read.csv("census1986.csv", header = TRUE)
# Get the CPI for 1986, 2006, and 2010
cpi2006 <- 1.091
cpi1986 <- 0.656
cpi2010 <- 1.165
calc_avg <- function(w1986, w2006, message, png_file){
# scale by CPIs to give 2010 dollars
inc2006 <- w2006$empin / cpi2006 * cpi2010
inc1986 <- w1986$wagesp / cpi1986 * cpi2010
# get the mean and standard error to create a confidence interval for each
# income - at this sample size, probably won't have a big interval
mean2006 <- mean(inc2006)
med2006 <- median(inc2006)
se2006 <- sd(inc2006) / sqrt(length(inc2006))
semd2006 <- 1.253 * se2006 # standard error for median is 1.253 * standard error for mean
mean1986 <- mean(inc1986)
med1986 <- median(inc1986)
se1986 <- sd(inc1986) / sqrt(length(inc1986))
semd1986 <- 1.253 * se1986
z <- 1.96 # 95% confidence
#z <- 1.645 # 90% confidence
#z <- 1.282 # 80% confidence
print(message)
print(sprintf("Confidence for 1986: %f to %f", mean1986 - z * se1986, mean1986 + z * se1986))
print(sprintf("Confidence for 2006: %f to %f", mean2006 - z * se2006, mean2006 + z * se2006))
print(sprintf("Standard Deviation for 1986: %f", sd(inc1986)))
print(sprintf("Standard Deviation for 2006: %f", sd(inc2006)))
print(sprintf("Median Confidence for 1986: %f to %f", med1986 - z * semd1986, med1986 + z * se1986))
print(sprintf("Median Confidence for 2006: %f to %f", med2006 - z * semd2006, med2006 + z * se2006))
# output some histograms
png(sprintf(png_file, 1986))
hist(inc1986, breaks = 30, col = "lightblue")
dev.off()
png(sprintf(png_file, 2006))
hist(inc2006, breaks = 30, col = "lightblue")
dev.off()
}
# filters:
# - people who are employed in a regular job (no self-employment)
# - this means COW (class of worker) is 4 for 2006, 1 for 1986
# - employment income number must be available
# - people who are at least 15 years old
# - 2006: age group is at least 6 (15-17 year olds) and not 88 (unavailable)
# - 1986: just gives raw age
# - have a university degree
bac2006 <- data2006[which(data2006$cow == 4 & data2006$empin != 9999999 & data2006$empin != 8888888
& data2006$agegrp >= 6 & data2006$agegrp != 88 & data2006$hdgree >= 9 & data2006$hdgree <= 13),]
bac1986 <- data1986[which(data1986$cowp == 1 & data1986$agep >= 15 & data1986$hlosp == 11),]
groups <- list(
list("education", 1, 1),
list("fine arts", 2, 2),
list("humanities", 3, 3),
list("social sciences", 4, 4),
list("commerce/business", 6, 5),
list("health/food sciences", 7, 6),
list("engineering", 8, 7),
list("sciences", 12, 10)
)
for (group in groups){
group2006 <- bac2006[which(bac2006$mfs == group[3]),]
group1986 <- bac1986[which(bac1986$dgmfs == group[2]),]
calc_avg(group1986, group2006, sprintf("Average wages for degree in %s:", group[1]), "wages_uni%d.png")
print("")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment