Created
November 16, 2016 14:17
-
-
Save jm3885/fd818aa37ac8ec47eb4d628275c725a5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#FIGURE 1 | |
#running packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
printplot <- function(plot) { | |
png('Figure1.png',height=500,width=1000) | |
print(plot) | |
dev.off() | |
} | |
#reading data downloaded from IPUMS | |
data1 <- read_csv('Data.csv') | |
#filtering Alaska and Hawaii | |
FirstGenData <- data1 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15))) | |
#filtering out year = 1870 | |
FirstGenData <- FirstGenData %>% filter(YEAR!=1870) | |
#filtering out children | |
FirstGenData <- FirstGenData %>% filter(AGE>18) | |
#Creating Data Frame of First Generation | |
FirstGenData1 <- FirstGenData %>% mutate(Gen1=ifelse(420<BPL & BPL<429,'First Generation, Northern & Western European', | |
ifelse(430<BPL & BPL<499,'First Generation, Southern & Eastern European', | |
ifelse(500<BPL & BPL<524,'First Generation, Asian', | |
'Neither')))) | |
rename(Gen1='First Generation Immigrants') | |
#filtering out immigrants who are "Neither" | |
FirstGenData2 <- FirstGenData1 %>% filter(Gen1 != 'Neither') | |
#Accounting for Sample Line Weight in 1940 | |
FirstGenData3 <- FirstGenData2 %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
#Group by Year, Gen1, and Literacy. Summarise by Weight. | |
FirstGenData4 <- FirstGenData3 %>% group_by(Gen1,YEAR) %>% summarise(Total=sum(Weight)) | |
#Creating Data Frame For Literate | |
FirstGenDataLit <- FirstGenData3 %>% mutate(LIT=ifelse(LIT==4,'Literate','Illiterate')) | |
FirstGenDataLit <- FirstGenDataLit %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
FirstGenDataLit2 <- FirstGenDataLit%>% group_by (Gen1,YEAR,LIT) %>% summarise(NumberLiterate=sum(Weight)) | |
#filtering out immigrants who are illiterate | |
FirstGenDataLit3 <- FirstGenDataLit2 %>% filter(LIT != 'Illiterate') | |
#Joining Data Frames | |
FirstGenDataFinal <- left_join(FirstGenData4,FirstGenDataLit3,by=c('YEAR','Gen1')) | |
#Selecting variables | |
FirstGenDataFinal2 <- FirstGenDataFinal %>% | |
select(YEAR,Gen1,NumberLiterate,Total) | |
#determining percent literate | |
datapercent <- FirstGenDataFinal2 %>% mutate(percent=NumberLiterate/Total*100) %>% | |
select(YEAR,Gen1,percent) %>% | |
rename(Group=Gen1) | |
#graphing | |
png('Figure1.3.png',height=1000,width=2000) | |
graph1 <- ggplot(data=datapercent, aes(x=YEAR, y=percent, group=Group,colour=Group)) + | |
geom_line()+ | |
geom_point()+ | |
expand_limits(y=0)+ | |
theme_set(theme_gray(base_size = 30))+ | |
labs(title='Literacy of First Generation Immigrants, 1880-1930',x='Year',y='Percent Literate') + | |
theme_classic() | |
printplot(graph1) | |
#FIGURE 2 | |
#running packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#reading data downloaded from IPUMS | |
data1 <- read_csv('Data.csv') | |
printplot <- function(plot) { | |
png('Figure2.png',height=500,width=1000) | |
print(plot) | |
dev.off() | |
} | |
#filtering Alaska and Hawaii | |
SecondGenData <- data1 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15))) | |
#filtering out year = 1870 | |
SecondGenData <- SecondGenData %>% filter(YEAR!=1870) | |
#filtering out children | |
SecondGenData <- SecondGenData %>% filter(AGE>18) | |
#Creating Data Frame of Second Generation | |
SecondGenData2 <- SecondGenData %>% mutate(Gen2=ifelse((420<MBPL & MBPL<429 | 420<FBPL & FBPL<429) & (001<BPL & BPL<099), 'Second Generation Northern & Western European', | |
ifelse((430<MBPL & MBPL<499 | 430<FBPL & FBPL<499) & (001<BPL & BPL<099), 'Second Generation Southern & Eastern European', | |
ifelse((500<MBPL & MBPL<524 | 500<FBPL & FBPL<524) & (001<BPL & BPL<099), 'Second Generation Asian','Neither')))) | |
#filtering out immigrants who are "Neither" | |
SecondGenData3 <- SecondGenData2 %>% filter(Gen2 != 'Neither') | |
#Accounting for Sample Line Weight in 1940 | |
SecondGenData4 <- SecondGenData3 %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
#Group by Year and Gen2. Summarise by Weight. | |
SecondGenData5 <- SecondGenData4 %>% group_by(Gen2,YEAR) %>% summarise(Total=sum(Weight)) | |
#Creating Data Frame For Literate | |
SecondGenDataLit <- SecondGenData3 %>% mutate(LIT=ifelse(LIT==4,'Literate','Illiterate')) | |
SecondGenDataLit2 <- SecondGenDataLit %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
SecondGenDataLit3 <- SecondGenDataLit2 %>% group_by (Gen2,YEAR,LIT) %>% summarise(NumberLiterate=sum(Weight)) | |
#filtering out immigrants who are illiterate | |
SecondGenDataLit4 <- SecondGenDataLit3 %>% filter(LIT != 'Illiterate') | |
#Joining Data Frames | |
SecondGenDataLitFinal <- left_join(SecondGenData5,SecondGenDataLit4,by=c('YEAR','Gen2')) | |
#Selecting variables | |
SecondGenFinal2 <- SecondGenDataLitFinal %>% | |
select(YEAR,Gen2,NumberLiterate,Total) | |
#determining percent literate | |
datapercent2 <- SecondGenFinal2 %>% mutate(percent=NumberLiterate/Total*100) %>% | |
select(YEAR,Gen2,percent)%>% | |
rename(Group=Gen2) | |
#graphing | |
png('Figure1.4.png',height=1000,width=2000) | |
graph1 <- ggplot(data=datapercent2, aes(x=YEAR, y=percent, group=Group,colour=Group)) + | |
geom_line()+ | |
geom_point()+ | |
expand_limits(y=0)+ | |
theme_set(theme_gray(base_size = 30))+ | |
labs(title='Literacy of Second Generation Immigrants, 1880-1930',x='Year',y='Percent Literate') + | |
theme_classic() | |
printplot(graph1) | |
#FIGURE 3 | |
#running packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
printplot <- function(plot) { | |
png('Figure3.2.png',height=500,width=1000) | |
print(plot) | |
dev.off() | |
} | |
#reading data downloaded from IPUMS | |
data2 <- read_csv(Data.csv) | |
#filtering Alaska and Hawaii | |
data2 <- data2 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15))) | |
#filtering out children | |
data2.1<-data2 %>% filter(AGE>18) | |
#filtering out year = 1870 | |
data2.2 <- data2.1 %>% filter(YEAR!=1870) | |
#Creating Data Frame of First Generation | |
data3 <- data2.2 %>% mutate(ImStatRace=factor(ifelse((420<BPL & BPL<429) | (430<BPL & BPL<499) | (500<BPL & BPL<524),1, | |
ifelse(RACE==1 & BPL<120,2, | |
ifelse(RACE==2 & BPL<120,3, | |
ifelse(RACE==3 & BPL<120,4,5)))), | |
labels=c('Immigrant','Native-Born White','Native-Born Black','Native American','Neither'))) | |
#BPL %in% 421:428 | BPL %in% 431:498 | |
#filtering out people who are "Neither" | |
data4 <- data3 %>% filter(ImStatRace != 'Neither') | |
#Accounting for Sample Line Weight in 1940 | |
data5 <- data4 %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
#group by ImStatRace and Year. | |
data6 <- data5 %>% group_by(ImStatRace,YEAR) %>% summarise(Total=sum(Weight)) | |
#creating data frame for literacy | |
datalit <- data5 %>% mutate(LIT=ifelse(LIT==4,'Literate','Illiterate')) | |
datalit <- datalit %>% filter(LIT=='Literate') | |
datalit2 <- datalit %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
datalit3 <- datalit2 %>% group_by (YEAR,ImStatRace,LIT) %>% summarise(NumberLiterate=sum(Weight)) | |
#Joining Data Frames | |
DataFinal <- left_join(data6,datalit3,by=c('YEAR','ImStatRace')) | |
#Selecting variables | |
DataFinal2 <- DataFinal %>% | |
select(YEAR,ImStatRace,NumberLiterate,Total) | |
#determining percent literate | |
datapercent3 <- DataFinal2 %>% mutate(percent=NumberLiterate/Total*100) %>% | |
select(YEAR,ImStatRace,percent)%>% | |
rename(Group=ImStatRace) | |
#graphing | |
png('Figure3.png',height=1000,width=2000) | |
graph3 <-ggplot(data=datapercent3, aes(x=YEAR, y=percent, group=Group, colour=Group)) + | |
geom_line()+ | |
geom_point()+ | |
expand_limits(y=0)+ | |
theme_set(theme_gray(base_size = 30))+ | |
labs(title='Literacy of Immigrants versus Native-Born Americans, 1880-1930',x='Year',y='Percent Literate', colour= 'Immigrant Group') + | |
theme_classic() | |
printplot(graph3) | |
#FIGURE 4 | |
#running packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#reading data downloaded from IPUMS | |
a <- read_csv('Figures462.csv') | |
#Create vector of age category labels | |
agecats <- '0-9' | |
for (i in 1:7) { | |
agecats <- c(agecats,paste(i,'0-',i,9,sep='')) | |
} | |
agecats <- c(agecats,'80+') | |
#assigning variables to sex | |
b <- a %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) | |
#separate ages by 0-9, 10-19, etc. | |
c <- b %>% mutate(Age=ifelse(AGE>=80,8,floor(AGE/10))) | |
#label with agecats labels to age | |
d <- c %>% mutate(Age=factor(Age,labels=agecats)) | |
#separating out first and second generation immigrants (need to look at code book to determine) | |
e <- d %>% mutate(Gen=ifelse((420<BPL & BPL<429),'First Generation', | |
ifelse((420<MBPL & MBPL<429) | (420<FBPL & FBPL<429) & (001<BPL & BPL<099), 'Second Generation', | |
'Neither'))) | |
#exclude data from Hawaii and Alaska | |
f <- e %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15))) | |
f <- f %>% filter(YEAR!=1980) | |
f <- f %>% filter(Gen != 'Neither') | |
f1 <- f %>% mutate(Weight=ifelse((YEAR==1940 | YEAR ==1950)&Gen=='Second Generation',SLWT,PERWT)) | |
#Group by Age, Sex, Generation, and Year | |
g <- f1 %>% group_by(Gen,YEAR,Age,Sex) %>% summarise(Number=sum(Weight)) | |
#flip male to other side of the graph | |
h <- g %>% mutate(Number=ifelse(Sex=='Male',-1 *Number,Number)) | |
#make population period graph. | |
#set correct number of pixels | |
png('population_pyramid_4.png',height=500,width=2000) | |
#set x and y axis | |
ggplot(data=h,aes(x=Age,y=Number,fill=Sex)) + | |
#setting the values of the bar in the graph | |
geom_bar(data=h[h$Sex=='Male',], stat='identity') + | |
geom_bar(data=h[h$Sex=='Female',],stat='identity') + | |
#flipping the graph around so the y axis is horizontal | |
coord_flip() + | |
#separating out/faceting the graphs by generation and year | |
facet_grid(Gen~.~YEAR) + | |
#View h to determine range, set range based on number of immigrants per year | |
scale_y_continuous(breaks=c(-100000,-50000,0,50000,100000), | |
labels=c('100','50','0','50','100')) + | |
#labeling the graph | |
labs(y='Population in Thousands',title='Population Pyramids for Northern and Western European Immigrants and their Children') + | |
labs() | |
#setting the color palette of the graph | |
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE))+ | |
#setting "Sex" label above the legend | |
guides(fill=guide_legend(title='Sex',title.position='top')) + | |
#setting background colors to black and white | |
theme_bw() + theme(legend.position='bottom') | |
dev.off() | |
#FIGURE 5 | |
#running packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#reading data downloaded from IPUMS | |
a2 <- read_csv('Figures462.csv') | |
#Create vector of age category labels | |
agecats <- '0-9' | |
for (i in 1:7) { | |
agecats <- c(agecats,paste(i,'0-',i,9,sep='')) | |
} | |
agecats <- c(agecats,'80+') | |
#assigning variables to sex | |
b2 <- a2 %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) | |
#separate ages by 0-9, 10-19, etc. | |
c2 <- b2 %>% mutate(Age=ifelse(AGE>=80,8,floor(AGE/10))) | |
#label with agecats labels to age | |
d2 <- c2 %>% mutate(Age=factor(Age,labels=agecats)) | |
#separating out first and second generation immigrants (need to look at code book to determine) | |
e2 <- d2 %>% mutate(Gen=ifelse((430<BPL & BPL<499),'First Generation', | |
ifelse((430<MBPL & MBPL<499) | (430<FBPL & FBPL<499) & (001<BPL & BPL<099), 'Second Generation', | |
'Neither'))) | |
#exclude data from Hawaii and Alaska | |
f2 <- e2 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15))) | |
f2 <- f2 %>% filter(YEAR!=1980) | |
#filter out 'neither' | |
f2 <- f2 %>% filter(Gen != 'Neither') | |
f2 <- f2 %>% mutate(Weight=ifelse(YEAR==1940 | YEAR==1950 & Gen=='Second Generation', | |
SLWT,PERWT)) | |
#Group by Age, Sex, Generation, and Year | |
g2 <- f2 %>% group_by(Gen,YEAR,Age,Sex) %>% summarise(Number=sum(Weight)) | |
#flip male to other side of the graph | |
h2 <- g2 %>% mutate(Number=ifelse(Sex=='Male',-1 *Number,Number)) | |
#make population period graph. | |
#set correct number of pixels | |
png('population_pyramid_final2.png',height=500,width=2000) | |
#set x and y axis | |
ggplot(data=h2,aes(x=Age,y=Number,fill=Sex)) + | |
#setting the values of the bar in the graph | |
geom_bar(data=h2[h2$Sex=='Male',], stat='identity') + | |
geom_bar(data=h2[h2$Sex=='Female',],stat='identity') + | |
#flipping the graph around so the y axis is horizontal | |
coord_flip() + | |
#separating out/faceting the graphs by generation and year | |
facet_grid(Gen~.~YEAR) + | |
#View h to determine range, set range based on number of immigrants per year | |
scale_y_continuous(breaks=c(-2000000,-1000000,0,1000000,2000000), | |
labels=c('2','1','0','1','2')) + | |
#labeling the graph | |
labs(y='Population in Millions',title='Population Pyramids for Southern and Eastern European Immigrants and their Children') + | |
#setting the color palette of the graph | |
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE))+ | |
#setting "Sex" label above the legend | |
guides(fill=guide_legend(title='Sex',title.position='top')) + | |
#setting background colors to black and white | |
theme_bw() + theme(legend.position='bottom') | |
dev.off() | |
#FIGURE 6 | |
#running packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#reading data downloaded from IPUMS | |
a3 <- read_csv('Figures462.csv') | |
#Create vector of age category labels | |
agecats <- '0-9' | |
for (i in 1:7) { | |
agecats <- c(agecats,paste(i,'0-',i,9,sep='')) | |
} | |
agecats <- c(agecats,'80+') | |
#assigning variables to sex | |
b3 <- a3 %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) | |
#separate ages by 0-9, 10-19, etc. | |
c3 <- b3 %>% mutate(Age=ifelse(AGE>=80,8,floor(AGE/10))) | |
#label with agecats labels to age | |
d3 <- c3 %>% mutate(Age=factor(Age,labels=agecats)) | |
#separating out first and second generation immigrants (need to look at code book to determine) | |
e3 <- d3 %>% mutate(Gen=(ifelse((500<BPL & BPL<524),'First Generation Asian', | |
ifelse((500<MBPL & MBPL<524) | (500<FBPL & FBPL<524) & (001<BPL & BPL<099), 'Second Generation Asian', | |
'Neither')))) | |
#exclude data from Hawaii and Alaska | |
f3 <- e3 %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15))) | |
f3 <- f3 %>% filter(YEAR!=1980) | |
g3 <- f3 %>% filter(Gen!= 'Neither') | |
f4 <- g3 %>% mutate(Weight=ifelse((YEAR==1940 | YEAR ==1950)&Gen=='Second Generation Asian',SLWT,PERWT)) | |
#Group by Age, Sex, Generation, and Year | |
h3 <- f4 %>% group_by(Gen,YEAR,Age,Sex) %>% summarise(Number=sum(Weight)) | |
#flip male to other side of the graph | |
h5 <- h3 %>% mutate(Number=ifelse(Sex=='Male',-1 *Number,Number)) | |
#make population period graph. | |
#set correct number of pixels | |
png('population_pyramid_final4.png',height=500,width=2000) | |
#set x and y axis | |
ggplot(data=h5,aes(x=Age,y=Number,fill=Sex)) + | |
#setting the values of the bar in the graph | |
geom_bar(data=h5[h5$Sex=='Male',], stat='identity') + | |
geom_bar(data=h5[h5$Sex=='Female',],stat='identity') + | |
#flipping the graph around so the y axis is horizontal | |
coord_flip() + | |
#separating out/faceting the graphs by generation and year | |
facet_grid(Gen~.~YEAR) + | |
#View h5 to determine range, set range based on number of immigrants per year | |
scale_y_continuous(breaks=c(-30000,-20000,-10000,0,10000,20000,30000), | |
labels=c('30','20','10','0','10','20','30')) + | |
#labeling the graph | |
labs(y='Population in Thousands',title='Population Pyramids for Asian Immigrants and their Children') + | |
#setting the color palette of the graph | |
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE))+ | |
#setting "Sex" label above the legend | |
guides(fill=guide_legend(title='Sex',title.position='top')) + | |
#setting background colors to black and white | |
theme_bw() + theme(legend.position='bottom') | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment