Skip to content

Instantly share code, notes, and snippets.

@eklanche
Created December 26, 2015 16:35
Show Gist options
  • Save eklanche/b8ecaffe1ac8d92392a1 to your computer and use it in GitHub Desktop.
Save eklanche/b8ecaffe1ac8d92392a1 to your computer and use it in GitHub Desktop.
#install required packages
install.packages('readr')
install.packages('plyr')
install.packages('dplyr')
install.packages('ggplot2')
install.packages('scales')
#load packages
library(readr)
library(plyr)
library(dplyr)
library(ggplot2)
library(scales)
#set working directory
setwd('/users/emily/dropbox/hist90_01/intro/data')
#read in ipums data
agesex <- read_csv('agesex.csv')
#factor SEX and assign string codes
agesex$SEXF <- revalue(factor(agesex$SEX),c('1'='male','2'='female'))
#calculate last digit of age
agesex$AGEL <- agesex$AGE %% 10
#exclude cases where AGE >= 80
agesex <- filter(agesex, AGE < 80)
#calculate persons in each last-digit category in each year, by sex
ageheap <- summarise(group_by(agesex,YEAR,SEXF,AGEL),NUMBER=sum(PERWT))
#calculate total persons in each year, by sex
total <- summarise(group_by(agesex,YEAR,SEXF),TOTAL=sum(PERWT))
#attach total for each year to last-digit data
ageheap <- merge(ageheap,total,by=c('YEAR','SEXF'))
#select only year 2000
ah2000 <- filter(ageheap,YEAR==2000)
#select only year 1850
ah1850 <- filter(ageheap,YEAR==1850)
#graph all years
png('ageheap.png',height=4000,width=4000)
ggplot(ageheap,aes(x=factor(AGEL),y=NUMBER/TOTAL)) + #AGEL on x axis, NUMBER on y axis
geom_bar(stat='identity') + #bar chart
geom_text(aes(x=factor(AGEL),y=NUMBER/TOTAL-.02,label=paste(as.character(round(NUMBER/TOTAL*100,digits=2)),'%',sep='')),colour='white') + #data labels
scale_y_continuous(labels=percent,breaks=c(.05,.1,.15)) + #scale for y axis
facet_grid(YEAR~.~SEXF) + #facet vertically by YEAR; horizontally by SEXF
xlab('Last digit of age') + ylab('Number') #axis labels
ggtitle('Age Heaping, 1850-1880 and 1900-2000') #graph title
dev.off()
#graph 2000
png('heap2000.png',height=500,width=1000)
ggplot(ah2000,aes(x=factor(AGEL),y=NUMBER/TOTAL)) + #AGEL on x axis, NUMBER on y axis
geom_bar(stat='identity') + #bar chart
geom_text(aes(x=factor(AGEL),y=NUMBER/TOTAL-.02,label=paste(as.character(round(NUMBER/TOTAL*100,digits=2)),'%',sep='')),colour='white') + #data labels
scale_y_continuous(labels=percent,breaks=c(.05,.1,.15)) + #scale for y axis
facet_grid(SEXF~.) + #facet vertically by sex
xlab('Last digit of age') + ylab('Number') + #axis labels
ggtitle('Age Heaping, 2000') #graph title
dev.off()
#graph 1850
png('heap1850.png',height=500,width=1000)
ggplot(ah1850,aes(x=factor(AGEL),y=NUMBER/TOTAL)) + #AGEL on x axis, NUMBER on y axis
geom_bar(stat='identity') + #bar chart
geom_text(aes(x=factor(AGEL),y=NUMBER/TOTAL-.02,label=paste(as.character(round(NUMBER/TOTAL*100,digits=2)),'%',sep='')),colour='white') +
scale_y_continuous(labels=percent,breaks=c(.05,.1,.15)) + #scale for y axis
facet_grid(SEXF~.) + #facet vertically by sex
xlab('Last digit of age') + ylab('Number') + #axis labels
ggtitle('Age Heaping, 1850') #graph title
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment