Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
data2006 <- read.csv("census2006.csv", header = TRUE)
data1986 <- read.csv("census1986.csv", header = TRUE)
# Get the CPI for 1986, 2006, and 2010
cpi2006 <- 1.091
cpi1986 <- 0.656
cpi2010 <- 1.165
calc_avg <- function(w1986, w2006, message, png_file){
# scale by CPIs to give 2010 dollars
inc2006 <- w2006$empin / cpi2006 * cpi2010
inc1986 <- w1986$wagesp / cpi1986 * cpi2010
# get the mean and standard error to create a confidence interval for each
# income - at this sample size, probably won't have a big interval
mean2006 <- mean(inc2006)
med2006 <- median(inc2006)
se2006 <- sd(inc2006) / sqrt(length(inc2006))
semd2006 <- 1.253 * se2006 # standard error for median is 1.253 * standard error for mean
mean1986 <- mean(inc1986)
med1986 <- median(inc1986)
se1986 <- sd(inc1986) / sqrt(length(inc1986))
semd1986 <- 1.253 * se1986
z <- 1.96
print(message)
print(sprintf("Confidence for 1986: %f to %f", mean1986 - z * se1986, mean1986 + z * se1986))
print(sprintf("Confidence for 2006: %f to %f", mean2006 - z * se2006, mean2006 + z * se2006))
print(sprintf("Standard Deviation for 1986: %f", sd(inc1986)))
print(sprintf("Standard Deviation for 2006: %f", sd(inc2006)))
print(sprintf("Median Confidence for 1986: %f to %f", med1986 - z * semd1986, med1986 + z * se1986))
print(sprintf("Median Confidence for 2006: %f to %f", med2006 - z * semd2006, med2006 + z * se2006))
# output some histograms
png(sprintf(png_file, 1986))
hist(inc1986, breaks = 30, col = "lightblue")
dev.off()
png(sprintf(png_file, 2006))
hist(inc2006, breaks = 30, col = "lightblue")
dev.off()
}
# filters:
# - people who are employed in a regular job (no self-employment)
# - this means COW (class of worker) is 4 for 2006, 1 for 1986
# - employment income number must be available
# - people who are at least 15 years old
# - 2006: age group is at least 6 (15-17 year olds) and not 88 (unavailable)
# - 1986: just gives raw age
workers2006 <- data2006[which(data2006$cow == 4 & data2006$empin != 9999999 & data2006$empin != 8888888
& data2006$agegrp >= 6 & data2006$agegrp != 88),]
workers1986 <- data1986[which(data1986$cowp == 1 & data1986$agep >= 15),]
calc_avg(workers1986, workers2006, "Average wages for all employed individuals:", "wages%d.png")
# Now for university graduates - this is for people who have at least a bachelor's degree
# They can also have master's or doctorate.
bac2006 <- workers2006[which(workers2006$hdgree >= 9 & workers2006$hdgree <= 13),]
bac1986 <- workers1986[which(workers1986$hlosp == 11),]
calc_avg(bac1986, bac2006, "Average wages for bachelor's degree or higher:", "wages_uni%d.png")
# for fun, let's do it for non-university graduates as well
no_bac2006 <- workers2006[which(workers2006$hdgree >= 1 & workers2006$hdgree < 9),]
no_bac1986 <- workers1986[which(workers1986$hlosp >= 1 & workers1986$hlosp <= 10),]
calc_avg(no_bac1986, no_bac2006, "Average wages for no bachelor's or higher:", "wages_non_uni%d.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment