Created
November 16, 2016 10:24
-
-
Save Kavakian9/38c3c7b03e7213bd88548653325a5a92 to your computer and use it in GitHub Desktop.
History 90 Final Project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Data visualizations | |
#1. Animated Population Map of Identifiable Armenians | |
#load packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#read in map data | |
mapdata <- read_csv('map.csv') | |
#create map outline | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long,y=lat,group=group),fill='white',color='black') | |
png('map.png',width=1500,height=1000) | |
print(map1) | |
dev.off() | |
#read in IPUMS data | |
data <- read_csv('finalproj.csv',col_types=cols(PERWT=col_double(),SLWT=col_double())) | |
#filter by birthplace, mother's birthplace, father's birthplace, mother tongue, and ancestry | |
birthplace <- data %>% filter(BPLD==46540 | MBPLD==46540 | FBPLD==46540 | MTONGUE==28 | MMTONGUE==28 | FMTONGUE==28| LANGUAGE==28 | ANCESTR1==431 | ANCESTR2==431) | |
#weight by PERWT in every year, except for 1940 when sample line variable is used | |
birthplace2 <- birthplace %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
#group by YEAR and STATEFIP | |
birthplace3 <- birthplace2 %>% group_by(YEAR,STATEFIP) %>% summarise(Number=sum(Weight)) | |
#quantcut to determine quartiles of data | |
cuts <- quantcut(birthplace3$Number,q=seq(0,1,.1)) | |
table(cuts) | |
#creating number categories for population | |
bplcats <- birthplace3 %>% mutate(Population=factor(ifelse(Number<300,1, | |
ifelse(Number<1000,2, | |
ifelse(Number<2500,3, | |
ifelse(Number<5000,4, | |
ifelse(Number<10000,5,6))))))) | |
levels(bplcats$Population) <- c('1-299','300-999','1000-2,499','2,500-4,999','5,000-9,999','10,000+') | |
table(bplcats$Population) | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
#join bplcats with newmap | |
amap <- left_join(bplcats,newmap,by=c('STATEFIP'='STATEI')) | |
amap <- amap %>% arrange(order) | |
#map it! | |
map2 <- ggplot() + theme_nothing(legend=TRUE) + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=mapdata, aes(x=long,y=lat,group=group),fill='white',color='black') + | |
geom_polygon(data=filter(amap,YEAR==1900),aes(x=long,y=lat,group=group,fill=Population),color='black') | |
png('map.png',width=1500,height=1000) | |
print(map2) | |
dev.off() | |
#create for loop for year | |
for (year in unique(amap$YEAR)) { | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + theme_bw(base_size = 24) + | |
geom_polygon(data=filter(amap,YEAR==year),aes(x=long,y=lat,group=group,fill=Population),color='black') + | |
labs(title=paste('Persons identified as Armenian,',year,sep=' ')) | |
png(paste('map_',year,'.png',sep=''),width=1500,height=1000) | |
print(map2) | |
dev.off() | |
} | |
#animate map! | |
install.packages('devtools') | |
library(devtools) | |
devtools::install_github('dgrtwo/gganimate') | |
library(gganimate) | |
anmap <- map2 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=amap,aes(x=long,y=lat,group=group,fill=Population,frame=YEAR),color='black') + | |
labs(title='Persons identified as Armenian,') | |
gg_animate(anmap,ani.width=1500,ani.height=1000,'anmap.gif') | |
#2. Clustered Column Graph of Sex Ratio of Identifiable Armenians and Non-Armenians | |
#load packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#read in IPUMS data and account for parsing errors | |
ipums <- read_csv('finalproj.csv',col_types=cols(PERWT=col_double())) | |
#create Worcester city variable | |
worc <- ipums %>% filter(CITY==7570) | |
#create character for Race | |
pop <- worc %>% mutate(Race=ifelse(BPLD==46540 | MBPLD==46540 | FBPLD==46540 | MTONGUE==28 | MMTONGUE==28 | FMTONGUE==28 | LANGUAGE==28 | ANCESTR1==431 | ANCESTR2==431,'Armenian', 'Non-Armenian')) | |
#write .csv file | |
write_csv(worc,'worc.csv') | |
#replace NA with 0 to create character above | |
worc[is.na(worc)] <- 0 | |
#recode SEX | |
sex <- pop %>% mutate(Sex=ifelse(SEX==1,'Male','Female')) | |
#select Year, PERWT, SLWT, Race | |
new <- sex %>% select(YEAR,PERWT,SLWT, Race,Sex) | |
#account for sample line variable in 1940 | |
new1 <- new %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
#Group by Year and Race | |
f1 <- new1 %>% group_by(Race,Sex,YEAR) %>% summarise(Number=sum(Weight)) | |
#Export figures to png | |
png('Graph2.png',height=500,width=1000) | |
#graphing | |
ggplot(data=f1,aes(x=YEAR, y=Number,fill=Sex)) + | |
geom_bar(stat='identity',position = 'dodge') + | |
labs(x='Year', y='Population', fill='',title='Population of Armenians and Non-Armenians in Worcester, MA by Year and Sex, 1900-2000') + | |
scale_y_continuous(labels=scales::comma) + | |
scale_x_continuous(breaks=c(1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000)) + | |
scale_fill_brewer(palette='Set2') + | |
facet_wrap(~Race,scales='free_y') + | |
theme_bw() | |
dev.off() | |
#3. Stacked Column Graph of Occupation of Identifiable Armenians and Non-Armenians | |
#load packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#read in IPUMS data & omit non-working age people | |
ipums <- read_csv('finalproj.csv',col_types=cols(PERWT=col_double())) %>% filter(AGE>=15 & AGE<=65) | |
#create Worcester city variable | |
worc <- ipums %>% filter(CITY==7570) | |
#creat a character for identifiable Armenians and non-Armenians | |
pop1 <- worc %>% mutate(Race=ifelse(BPLD==46540 | MBPLD==46540 | FBPLD==46540 | MTONGUE==28 | MMTONGUE==28 | FMTONGUE==28 | LANGUAGE==28 | ANCESTR1==431 | ANCESTR2==431,'Armenian', 'Non-Armenian')) | |
#write .csv file | |
write_csv(worc,'worc.csv') | |
#replace NA with 0 to create character above | |
worc[is.na(worc)] <- 0 | |
#create Occupation variable by recoding OCC1950 | |
occ <- pop1 %>% mutate(Occupation=factor(ifelse(OCC1950 %in% c(980:999), 1, | |
ifelse(OCC1950 %in% c(100) | OCC1950 %in% c(123) | OCC1950 %in% c(810:840) , 2, | |
ifelse(OCC1950 %in% c(500:690) | OCC1950 %in% c(910:979),3, | |
ifelse(OCC1950 %in% c(200:490), 4, | |
ifelse(OCC1950 %in% c(700:790) ,5, | |
ifelse(OCC1950 %in% c(000:099),6,NA)))))), | |
labels=c('None','Farmers/Farm Laborers','Craftsmen/Operatives/Laborers', | |
'Managerial/Clerical/Sales','Service', | |
'Professional'))) | |
#recode SEX | |
sex <- occ %>% mutate(Sex=ifelse(SEX==1,'Male','Female')) | |
#narrow down to variables I need by selecting YEAR, PERWT, SLWT, Race, Occupation, and Sex | |
select <- sex %>% select(YEAR,PERWT,SLWT,Race,Occupation,Sex) | |
#account for sample line variable in 1940 | |
select1 <- select %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT)) | |
#Group by Year, Sex, Race, and Occupation | |
f1 <- select1 %>% group_by(YEAR, Sex, Race,Occupation) %>% summarise(Number=sum(Weight)) | |
#Export figures to png | |
png('Graph3.png',height=500,width=1000) | |
#graph it! | |
ggplot(data=arrange(f1,Occupation),aes(x=YEAR, y=Number,fill=Occupation)) + | |
geom_bar(stat='identity',position='fill') + | |
labs(x='Year', y='Percent of Population', fill='Occupation',title='Occupation of Armenians and Non-Armenians Aged 15-65 by Year and Sex in Worcester, MA, 1900-2000') + | |
scale_y_continuous(labels=scales::percent) + | |
scale_x_continuous(breaks=c(1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000)) + | |
scale_fill_brewer(palette='Set1') + | |
facet_grid(Sex~.~Race) + | |
theme_bw()+ theme(legend.position='bottom') | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment