Skip to content

Instantly share code, notes, and snippets.

@jm3885
Created November 16, 2016 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jm3885/fd818aa37ac8ec47eb4d628275c725a5 to your computer and use it in GitHub Desktop.
Save jm3885/fd818aa37ac8ec47eb4d628275c725a5 to your computer and use it in GitHub Desktop.
#FIGURE 1
#running packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
printplot <- function(plot) {
png('Figure1.png',height=500,width=1000)
print(plot)
dev.off()
}
#reading data downloaded from IPUMS
data1 <- read_csv('Data.csv')
#filtering Alaska and Hawaii
FirstGenData <- data1 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15)))
#filtering out year = 1870
FirstGenData <- FirstGenData %>% filter(YEAR!=1870)
#filtering out children
FirstGenData <- FirstGenData %>% filter(AGE>18)
#Creating Data Frame of First Generation
FirstGenData1 <- FirstGenData %>% mutate(Gen1=ifelse(420<BPL & BPL<429,'First Generation, Northern & Western European',
ifelse(430<BPL & BPL<499,'First Generation, Southern & Eastern European',
ifelse(500<BPL & BPL<524,'First Generation, Asian',
'Neither'))))
rename(Gen1='First Generation Immigrants')
#filtering out immigrants who are "Neither"
FirstGenData2 <- FirstGenData1 %>% filter(Gen1 != 'Neither')
#Accounting for Sample Line Weight in 1940
FirstGenData3 <- FirstGenData2 %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
#Group by Year, Gen1, and Literacy. Summarise by Weight.
FirstGenData4 <- FirstGenData3 %>% group_by(Gen1,YEAR) %>% summarise(Total=sum(Weight))
#Creating Data Frame For Literate
FirstGenDataLit <- FirstGenData3 %>% mutate(LIT=ifelse(LIT==4,'Literate','Illiterate'))
FirstGenDataLit <- FirstGenDataLit %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
FirstGenDataLit2 <- FirstGenDataLit%>% group_by (Gen1,YEAR,LIT) %>% summarise(NumberLiterate=sum(Weight))
#filtering out immigrants who are illiterate
FirstGenDataLit3 <- FirstGenDataLit2 %>% filter(LIT != 'Illiterate')
#Joining Data Frames
FirstGenDataFinal <- left_join(FirstGenData4,FirstGenDataLit3,by=c('YEAR','Gen1'))
#Selecting variables
FirstGenDataFinal2 <- FirstGenDataFinal %>%
select(YEAR,Gen1,NumberLiterate,Total)
#determining percent literate
datapercent <- FirstGenDataFinal2 %>% mutate(percent=NumberLiterate/Total*100) %>%
select(YEAR,Gen1,percent) %>%
rename(Group=Gen1)
#graphing
png('Figure1.3.png',height=1000,width=2000)
graph1 <- ggplot(data=datapercent, aes(x=YEAR, y=percent, group=Group,colour=Group)) +
geom_line()+
geom_point()+
expand_limits(y=0)+
theme_set(theme_gray(base_size = 30))+
labs(title='Literacy of First Generation Immigrants, 1880-1930',x='Year',y='Percent Literate') +
theme_classic()
printplot(graph1)
#FIGURE 2
#running packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
#reading data downloaded from IPUMS
data1 <- read_csv('Data.csv')
printplot <- function(plot) {
png('Figure2.png',height=500,width=1000)
print(plot)
dev.off()
}
#filtering Alaska and Hawaii
SecondGenData <- data1 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15)))
#filtering out year = 1870
SecondGenData <- SecondGenData %>% filter(YEAR!=1870)
#filtering out children
SecondGenData <- SecondGenData %>% filter(AGE>18)
#Creating Data Frame of Second Generation
SecondGenData2 <- SecondGenData %>% mutate(Gen2=ifelse((420<MBPL & MBPL<429 | 420<FBPL & FBPL<429) & (001<BPL & BPL<099), 'Second Generation Northern & Western European',
ifelse((430<MBPL & MBPL<499 | 430<FBPL & FBPL<499) & (001<BPL & BPL<099), 'Second Generation Southern & Eastern European',
ifelse((500<MBPL & MBPL<524 | 500<FBPL & FBPL<524) & (001<BPL & BPL<099), 'Second Generation Asian','Neither'))))
#filtering out immigrants who are "Neither"
SecondGenData3 <- SecondGenData2 %>% filter(Gen2 != 'Neither')
#Accounting for Sample Line Weight in 1940
SecondGenData4 <- SecondGenData3 %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
#Group by Year and Gen2. Summarise by Weight.
SecondGenData5 <- SecondGenData4 %>% group_by(Gen2,YEAR) %>% summarise(Total=sum(Weight))
#Creating Data Frame For Literate
SecondGenDataLit <- SecondGenData3 %>% mutate(LIT=ifelse(LIT==4,'Literate','Illiterate'))
SecondGenDataLit2 <- SecondGenDataLit %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
SecondGenDataLit3 <- SecondGenDataLit2 %>% group_by (Gen2,YEAR,LIT) %>% summarise(NumberLiterate=sum(Weight))
#filtering out immigrants who are illiterate
SecondGenDataLit4 <- SecondGenDataLit3 %>% filter(LIT != 'Illiterate')
#Joining Data Frames
SecondGenDataLitFinal <- left_join(SecondGenData5,SecondGenDataLit4,by=c('YEAR','Gen2'))
#Selecting variables
SecondGenFinal2 <- SecondGenDataLitFinal %>%
select(YEAR,Gen2,NumberLiterate,Total)
#determining percent literate
datapercent2 <- SecondGenFinal2 %>% mutate(percent=NumberLiterate/Total*100) %>%
select(YEAR,Gen2,percent)%>%
rename(Group=Gen2)
#graphing
png('Figure1.4.png',height=1000,width=2000)
graph1 <- ggplot(data=datapercent2, aes(x=YEAR, y=percent, group=Group,colour=Group)) +
geom_line()+
geom_point()+
expand_limits(y=0)+
theme_set(theme_gray(base_size = 30))+
labs(title='Literacy of Second Generation Immigrants, 1880-1930',x='Year',y='Percent Literate') +
theme_classic()
printplot(graph1)
#FIGURE 3
#running packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
printplot <- function(plot) {
png('Figure3.2.png',height=500,width=1000)
print(plot)
dev.off()
}
#reading data downloaded from IPUMS
data2 <- read_csv(Data.csv)
#filtering Alaska and Hawaii
data2 <- data2 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15)))
#filtering out children
data2.1<-data2 %>% filter(AGE>18)
#filtering out year = 1870
data2.2 <- data2.1 %>% filter(YEAR!=1870)
#Creating Data Frame of First Generation
data3 <- data2.2 %>% mutate(ImStatRace=factor(ifelse((420<BPL & BPL<429) | (430<BPL & BPL<499) | (500<BPL & BPL<524),1,
ifelse(RACE==1 & BPL<120,2,
ifelse(RACE==2 & BPL<120,3,
ifelse(RACE==3 & BPL<120,4,5)))),
labels=c('Immigrant','Native-Born White','Native-Born Black','Native American','Neither')))
#BPL %in% 421:428 | BPL %in% 431:498
#filtering out people who are "Neither"
data4 <- data3 %>% filter(ImStatRace != 'Neither')
#Accounting for Sample Line Weight in 1940
data5 <- data4 %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
#group by ImStatRace and Year.
data6 <- data5 %>% group_by(ImStatRace,YEAR) %>% summarise(Total=sum(Weight))
#creating data frame for literacy
datalit <- data5 %>% mutate(LIT=ifelse(LIT==4,'Literate','Illiterate'))
datalit <- datalit %>% filter(LIT=='Literate')
datalit2 <- datalit %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
datalit3 <- datalit2 %>% group_by (YEAR,ImStatRace,LIT) %>% summarise(NumberLiterate=sum(Weight))
#Joining Data Frames
DataFinal <- left_join(data6,datalit3,by=c('YEAR','ImStatRace'))
#Selecting variables
DataFinal2 <- DataFinal %>%
select(YEAR,ImStatRace,NumberLiterate,Total)
#determining percent literate
datapercent3 <- DataFinal2 %>% mutate(percent=NumberLiterate/Total*100) %>%
select(YEAR,ImStatRace,percent)%>%
rename(Group=ImStatRace)
#graphing
png('Figure3.png',height=1000,width=2000)
graph3 <-ggplot(data=datapercent3, aes(x=YEAR, y=percent, group=Group, colour=Group)) +
geom_line()+
geom_point()+
expand_limits(y=0)+
theme_set(theme_gray(base_size = 30))+
labs(title='Literacy of Immigrants versus Native-Born Americans, 1880-1930',x='Year',y='Percent Literate', colour= 'Immigrant Group') +
theme_classic()
printplot(graph3)
#FIGURE 4
#running packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
#reading data downloaded from IPUMS
a <- read_csv('Figures462.csv')
#Create vector of age category labels
agecats <- '0-9'
for (i in 1:7) {
agecats <- c(agecats,paste(i,'0-',i,9,sep=''))
}
agecats <- c(agecats,'80+')
#assigning variables to sex
b <- a %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#separate ages by 0-9, 10-19, etc.
c <- b %>% mutate(Age=ifelse(AGE>=80,8,floor(AGE/10)))
#label with agecats labels to age
d <- c %>% mutate(Age=factor(Age,labels=agecats))
#separating out first and second generation immigrants (need to look at code book to determine)
e <- d %>% mutate(Gen=ifelse((420<BPL & BPL<429),'First Generation',
ifelse((420<MBPL & MBPL<429) | (420<FBPL & FBPL<429) & (001<BPL & BPL<099), 'Second Generation',
'Neither')))
#exclude data from Hawaii and Alaska
f <- e %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15)))
f <- f %>% filter(YEAR!=1980)
f <- f %>% filter(Gen != 'Neither')
f1 <- f %>% mutate(Weight=ifelse((YEAR==1940 | YEAR ==1950)&Gen=='Second Generation',SLWT,PERWT))
#Group by Age, Sex, Generation, and Year
g <- f1 %>% group_by(Gen,YEAR,Age,Sex) %>% summarise(Number=sum(Weight))
#flip male to other side of the graph
h <- g %>% mutate(Number=ifelse(Sex=='Male',-1 *Number,Number))
#make population period graph.
#set correct number of pixels
png('population_pyramid_4.png',height=500,width=2000)
#set x and y axis
ggplot(data=h,aes(x=Age,y=Number,fill=Sex)) +
#setting the values of the bar in the graph
geom_bar(data=h[h$Sex=='Male',], stat='identity') +
geom_bar(data=h[h$Sex=='Female',],stat='identity') +
#flipping the graph around so the y axis is horizontal
coord_flip() +
#separating out/faceting the graphs by generation and year
facet_grid(Gen~.~YEAR) +
#View h to determine range, set range based on number of immigrants per year
scale_y_continuous(breaks=c(-100000,-50000,0,50000,100000),
labels=c('100','50','0','50','100')) +
#labeling the graph
labs(y='Population in Thousands',title='Population Pyramids for Northern and Western European Immigrants and their Children') +
labs()
#setting the color palette of the graph
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE))+
#setting "Sex" label above the legend
guides(fill=guide_legend(title='Sex',title.position='top')) +
#setting background colors to black and white
theme_bw() + theme(legend.position='bottom')
dev.off()
#FIGURE 5
#running packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
#reading data downloaded from IPUMS
a2 <- read_csv('Figures462.csv')
#Create vector of age category labels
agecats <- '0-9'
for (i in 1:7) {
agecats <- c(agecats,paste(i,'0-',i,9,sep=''))
}
agecats <- c(agecats,'80+')
#assigning variables to sex
b2 <- a2 %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#separate ages by 0-9, 10-19, etc.
c2 <- b2 %>% mutate(Age=ifelse(AGE>=80,8,floor(AGE/10)))
#label with agecats labels to age
d2 <- c2 %>% mutate(Age=factor(Age,labels=agecats))
#separating out first and second generation immigrants (need to look at code book to determine)
e2 <- d2 %>% mutate(Gen=ifelse((430<BPL & BPL<499),'First Generation',
ifelse((430<MBPL & MBPL<499) | (430<FBPL & FBPL<499) & (001<BPL & BPL<099), 'Second Generation',
'Neither')))
#exclude data from Hawaii and Alaska
f2 <- e2 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15)))
f2 <- f2 %>% filter(YEAR!=1980)
#filter out 'neither'
f2 <- f2 %>% filter(Gen != 'Neither')
f2 <- f2 %>% mutate(Weight=ifelse(YEAR==1940 | YEAR==1950 & Gen=='Second Generation',
SLWT,PERWT))
#Group by Age, Sex, Generation, and Year
g2 <- f2 %>% group_by(Gen,YEAR,Age,Sex) %>% summarise(Number=sum(Weight))
#flip male to other side of the graph
h2 <- g2 %>% mutate(Number=ifelse(Sex=='Male',-1 *Number,Number))
#make population period graph.
#set correct number of pixels
png('population_pyramid_final2.png',height=500,width=2000)
#set x and y axis
ggplot(data=h2,aes(x=Age,y=Number,fill=Sex)) +
#setting the values of the bar in the graph
geom_bar(data=h2[h2$Sex=='Male',], stat='identity') +
geom_bar(data=h2[h2$Sex=='Female',],stat='identity') +
#flipping the graph around so the y axis is horizontal
coord_flip() +
#separating out/faceting the graphs by generation and year
facet_grid(Gen~.~YEAR) +
#View h to determine range, set range based on number of immigrants per year
scale_y_continuous(breaks=c(-2000000,-1000000,0,1000000,2000000),
labels=c('2','1','0','1','2')) +
#labeling the graph
labs(y='Population in Millions',title='Population Pyramids for Southern and Eastern European Immigrants and their Children') +
#setting the color palette of the graph
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE))+
#setting "Sex" label above the legend
guides(fill=guide_legend(title='Sex',title.position='top')) +
#setting background colors to black and white
theme_bw() + theme(legend.position='bottom')
dev.off()
#FIGURE 6
#running packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
#reading data downloaded from IPUMS
a3 <- read_csv('Figures462.csv')
#Create vector of age category labels
agecats <- '0-9'
for (i in 1:7) {
agecats <- c(agecats,paste(i,'0-',i,9,sep=''))
}
agecats <- c(agecats,'80+')
#assigning variables to sex
b3 <- a3 %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#separate ages by 0-9, 10-19, etc.
c3 <- b3 %>% mutate(Age=ifelse(AGE>=80,8,floor(AGE/10)))
#label with agecats labels to age
d3 <- c3 %>% mutate(Age=factor(Age,labels=agecats))
#separating out first and second generation immigrants (need to look at code book to determine)
e3 <- d3 %>% mutate(Gen=(ifelse((500<BPL & BPL<524),'First Generation Asian',
ifelse((500<MBPL & MBPL<524) | (500<FBPL & FBPL<524) & (001<BPL & BPL<099), 'Second Generation Asian',
'Neither'))))
#exclude data from Hawaii and Alaska
f3 <- e3 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15)))
f3 <- f3 %>% filter(YEAR!=1980)
g3 <- f3 %>% filter(Gen!= 'Neither')
f4 <- g3 %>% mutate(Weight=ifelse((YEAR==1940 | YEAR ==1950)&Gen=='Second Generation Asian',SLWT,PERWT))
#Group by Age, Sex, Generation, and Year
h3 <- f4 %>% group_by(Gen,YEAR,Age,Sex) %>% summarise(Number=sum(Weight))
#flip male to other side of the graph
h5 <- h3 %>% mutate(Number=ifelse(Sex=='Male',-1 *Number,Number))
#make population period graph.
#set correct number of pixels
png('population_pyramid_final4.png',height=500,width=2000)
#set x and y axis
ggplot(data=h5,aes(x=Age,y=Number,fill=Sex)) +
#setting the values of the bar in the graph
geom_bar(data=h5[h5$Sex=='Male',], stat='identity') +
geom_bar(data=h5[h5$Sex=='Female',],stat='identity') +
#flipping the graph around so the y axis is horizontal
coord_flip() +
#separating out/faceting the graphs by generation and year
facet_grid(Gen~.~YEAR) +
#View h5 to determine range, set range based on number of immigrants per year
scale_y_continuous(breaks=c(-30000,-20000,-10000,0,10000,20000,30000),
labels=c('30','20','10','0','10','20','30')) +
#labeling the graph
labs(y='Population in Thousands',title='Population Pyramids for Asian Immigrants and their Children') +
#setting the color palette of the graph
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE))+
#setting "Sex" label above the legend
guides(fill=guide_legend(title='Sex',title.position='top')) +
#setting background colors to black and white
theme_bw() + theme(legend.position='bottom')
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment