Skip to content

Instantly share code, notes, and snippets.

@SirSamAlot280
Created October 9, 2016 01:24
Show Gist options
  • Save SirSamAlot280/31e363f7a87dcda6fb2232e195dad692 to your computer and use it in GitHub Desktop.
Save SirSamAlot280/31e363f7a87dcda6fb2232e195dad692 to your computer and use it in GitHub Desktop.
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
#Read in selected IPUMS data; Filter out data for Hawaii and Alaska
#except for 1960 and onwards
a <- read_csv('./data/10_6.csv') %>% filter(YEAR>=1960 | !STATEFIP %in% c(2,15))
#Create a varaible named Generation to denote which individuals are
#First Generation and Second Generation immigrants
b <- a %>% mutate(Gen=ifelse(BPL>=450 & BPL<=457, 'First Generation',
ifelse(MBPL>=450 & MBPL<=457 |
FBPL>=450 & FBPL<=457, 'Second Generation', 'Neither')))
#Recode sex/gender to distinguish between Male and Female
c <- b %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#Establish age categories; Establish a floor for the age
d <- c %>% mutate(Age=ifelse(AGE>=80,8,floor(AGE/10)))
#Assign specific ranges for the age categories
e <- d %>% mutate(Age=factor(Age,labels=c('0-9','10-19','20-29',
'30-39','40-49','50-59','60-69','70-79','80+')))
#Remove individuals from the data that are categorized as "Neither"
f <- e %>% filter(Gen!='Neither')
#Conduct the PERWT for the data except for the year 1940 and individuals designated as Second Generation
g <- f %>% mutate(Weight=ifelse(YEAR==1940 & Gen=='Second Generation',SLWT,PERWT))
#Gather only the data that will be used for the population pyramids
h <- g %>% group_by(Age,Sex,Gen,YEAR) %>% summarise(Number=sum(Weight))
#Separate Male and Female data in the Population Pyramid
i <- h %>% mutate(Number=ifelse(Sex=='Male', -1 *Number,Number))
#Creating Population Pyramids
png('Population_Pyramids_1.png',height=500,width=2000)
ggplot(data=i,aes(x=Age,y=Number,fill=Sex)) +
geom_bar(data=i[i$Sex=='Male',],stat='identity') +
geom_bar(data=i[i$Sex=='Female',],stat='identity') +
coord_flip() +
facet_grid(Gen~.~YEAR) +
scale_y_continuous(breaks=c(-1000000,-500000,0,500000,1000000),
labels=c('1','0.5','0','0.5','1')) +
labs(y='Population in Millions',title='Population Pyramids for Immigrants and their Children from Central/Eastern Europe') +
scale_fill_discrete(guide=guide_legend(reverse=TRUE)) +
theme_bw() + theme(legend.position='bottom')
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment