Instantly share code, notes, and snippets.

# robbrit/Degree Values - Code Created Feb 21, 2011

What would you like to do?
 data2006 <- read.csv("census2006.csv", header = TRUE) data1986 <- read.csv("census1986.csv", header = TRUE) # Get the CPI for 1986, 2006, and 2010 cpi2006 <- 1.091 cpi1986 <- 0.656 cpi2010 <- 1.165 calc_avg <- function(w1986, w2006, message, png_file){ # scale by CPIs to give 2010 dollars inc2006 <- w2006\$empin / cpi2006 * cpi2010 inc1986 <- w1986\$wagesp / cpi1986 * cpi2010 # get the mean and standard error to create a confidence interval for each # income - at this sample size, probably won't have a big interval mean2006 <- mean(inc2006) med2006 <- median(inc2006) se2006 <- sd(inc2006) / sqrt(length(inc2006)) semd2006 <- 1.253 * se2006 # standard error for median is 1.253 * standard error for mean mean1986 <- mean(inc1986) med1986 <- median(inc1986) se1986 <- sd(inc1986) / sqrt(length(inc1986)) semd1986 <- 1.253 * se1986 z <- 1.96 # 95% confidence #z <- 1.645 # 90% confidence #z <- 1.282 # 80% confidence print(message) print(sprintf("Confidence for 1986: %f to %f", mean1986 - z * se1986, mean1986 + z * se1986)) print(sprintf("Confidence for 2006: %f to %f", mean2006 - z * se2006, mean2006 + z * se2006)) print(sprintf("Standard Deviation for 1986: %f", sd(inc1986))) print(sprintf("Standard Deviation for 2006: %f", sd(inc2006))) print(sprintf("Median Confidence for 1986: %f to %f", med1986 - z * semd1986, med1986 + z * se1986)) print(sprintf("Median Confidence for 2006: %f to %f", med2006 - z * semd2006, med2006 + z * se2006)) # output some histograms png(sprintf(png_file, 1986)) hist(inc1986, breaks = 30, col = "lightblue") dev.off() png(sprintf(png_file, 2006)) hist(inc2006, breaks = 30, col = "lightblue") dev.off() } # filters: # - people who are employed in a regular job (no self-employment) # - this means COW (class of worker) is 4 for 2006, 1 for 1986 # - employment income number must be available # - people who are at least 15 years old # - 2006: age group is at least 6 (15-17 year olds) and not 88 (unavailable) # - 1986: just gives raw age # - have a university degree bac2006 <- data2006[which(data2006\$cow == 4 & data2006\$empin != 9999999 & data2006\$empin != 8888888 & data2006\$agegrp >= 6 & data2006\$agegrp != 88 & data2006\$hdgree >= 9 & data2006\$hdgree <= 13),] bac1986 <- data1986[which(data1986\$cowp == 1 & data1986\$agep >= 15 & data1986\$hlosp == 11),] groups <- list( list("education", 1, 1), list("fine arts", 2, 2), list("humanities", 3, 3), list("social sciences", 4, 4), list("commerce/business", 6, 5), list("health/food sciences", 7, 6), list("engineering", 8, 7), list("sciences", 12, 10) ) for (group in groups){ group2006 <- bac2006[which(bac2006\$mfs == group[3]),] group1986 <- bac1986[which(bac1986\$dgmfs == group[2]),] calc_avg(group1986, group2006, sprintf("Average wages for degree in %s:", group[1]), "wages_uni%d.png") print("") }