Created
February 21, 2011 00:10
-
-
Save robbrit/836454 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data2006 <- read.csv("census2006.csv", header = TRUE) | |
data1986 <- read.csv("census1986.csv", header = TRUE) | |
# Get the CPI for 1986, 2006, and 2010 | |
cpi2006 <- 1.091 | |
cpi1986 <- 0.656 | |
cpi2010 <- 1.165 | |
calc_avg <- function(w1986, w2006, message, png_file){ | |
# scale by CPIs to give 2010 dollars | |
inc2006 <- w2006$empin / cpi2006 * cpi2010 | |
inc1986 <- w1986$wagesp / cpi1986 * cpi2010 | |
# get the mean and standard error to create a confidence interval for each | |
# income - at this sample size, probably won't have a big interval | |
mean2006 <- mean(inc2006) | |
med2006 <- median(inc2006) | |
se2006 <- sd(inc2006) / sqrt(length(inc2006)) | |
semd2006 <- 1.253 * se2006 # standard error for median is 1.253 * standard error for mean | |
mean1986 <- mean(inc1986) | |
med1986 <- median(inc1986) | |
se1986 <- sd(inc1986) / sqrt(length(inc1986)) | |
semd1986 <- 1.253 * se1986 | |
z <- 1.96 # 95% confidence | |
#z <- 1.645 # 90% confidence | |
#z <- 1.282 # 80% confidence | |
print(message) | |
print(sprintf("Confidence for 1986: %f to %f", mean1986 - z * se1986, mean1986 + z * se1986)) | |
print(sprintf("Confidence for 2006: %f to %f", mean2006 - z * se2006, mean2006 + z * se2006)) | |
print(sprintf("Standard Deviation for 1986: %f", sd(inc1986))) | |
print(sprintf("Standard Deviation for 2006: %f", sd(inc2006))) | |
print(sprintf("Median Confidence for 1986: %f to %f", med1986 - z * semd1986, med1986 + z * se1986)) | |
print(sprintf("Median Confidence for 2006: %f to %f", med2006 - z * semd2006, med2006 + z * se2006)) | |
# output some histograms | |
png(sprintf(png_file, 1986)) | |
hist(inc1986, breaks = 30, col = "lightblue") | |
dev.off() | |
png(sprintf(png_file, 2006)) | |
hist(inc2006, breaks = 30, col = "lightblue") | |
dev.off() | |
} | |
# filters: | |
# - people who are employed in a regular job (no self-employment) | |
# - this means COW (class of worker) is 4 for 2006, 1 for 1986 | |
# - employment income number must be available | |
# - people who are at least 15 years old | |
# - 2006: age group is at least 6 (15-17 year olds) and not 88 (unavailable) | |
# - 1986: just gives raw age | |
# - have a university degree | |
bac2006 <- data2006[which(data2006$cow == 4 & data2006$empin != 9999999 & data2006$empin != 8888888 | |
& data2006$agegrp >= 6 & data2006$agegrp != 88 & data2006$hdgree >= 9 & data2006$hdgree <= 13),] | |
bac1986 <- data1986[which(data1986$cowp == 1 & data1986$agep >= 15 & data1986$hlosp == 11),] | |
groups <- list( | |
list("education", 1, 1), | |
list("fine arts", 2, 2), | |
list("humanities", 3, 3), | |
list("social sciences", 4, 4), | |
list("commerce/business", 6, 5), | |
list("health/food sciences", 7, 6), | |
list("engineering", 8, 7), | |
list("sciences", 12, 10) | |
) | |
for (group in groups){ | |
group2006 <- bac2006[which(bac2006$mfs == group[3]),] | |
group1986 <- bac1986[which(bac1986$dgmfs == group[2]),] | |
calc_avg(group1986, group2006, sprintf("Average wages for degree in %s:", group[1]), "wages_uni%d.png") | |
print("") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment