Skip to content

Instantly share code, notes, and snippets.

@Kavakian9
Created November 16, 2016 10:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Kavakian9/38c3c7b03e7213bd88548653325a5a92 to your computer and use it in GitHub Desktop.
Save Kavakian9/38c3c7b03e7213bd88548653325a5a92 to your computer and use it in GitHub Desktop.
History 90 Final Project
#Data visualizations
#1. Animated Population Map of Identifiable Armenians
#load packages
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
#read in map data
mapdata <- read_csv('map.csv')
#create map outline
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long,y=lat,group=group),fill='white',color='black')
png('map.png',width=1500,height=1000)
print(map1)
dev.off()
#read in IPUMS data
data <- read_csv('finalproj.csv',col_types=cols(PERWT=col_double(),SLWT=col_double()))
#filter by birthplace, mother's birthplace, father's birthplace, mother tongue, and ancestry
birthplace <- data %>% filter(BPLD==46540 | MBPLD==46540 | FBPLD==46540 | MTONGUE==28 | MMTONGUE==28 | FMTONGUE==28| LANGUAGE==28 | ANCESTR1==431 | ANCESTR2==431)
#weight by PERWT in every year, except for 1940 when sample line variable is used
birthplace2 <- birthplace %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
#group by YEAR and STATEFIP
birthplace3 <- birthplace2 %>% group_by(YEAR,STATEFIP) %>% summarise(Number=sum(Weight))
#quantcut to determine quartiles of data
cuts <- quantcut(birthplace3$Number,q=seq(0,1,.1))
table(cuts)
#creating number categories for population
bplcats <- birthplace3 %>% mutate(Population=factor(ifelse(Number<300,1,
ifelse(Number<1000,2,
ifelse(Number<2500,3,
ifelse(Number<5000,4,
ifelse(Number<10000,5,6)))))))
levels(bplcats$Population) <- c('1-299','300-999','1000-2,499','2,500-4,999','5,000-9,999','10,000+')
table(bplcats$Population)
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
#join bplcats with newmap
amap <- left_join(bplcats,newmap,by=c('STATEFIP'='STATEI'))
amap <- amap %>% arrange(order)
#map it!
map2 <- ggplot() + theme_nothing(legend=TRUE) + scale_fill_brewer(palette='Blues') +
geom_polygon(data=mapdata, aes(x=long,y=lat,group=group),fill='white',color='black') +
geom_polygon(data=filter(amap,YEAR==1900),aes(x=long,y=lat,group=group,fill=Population),color='black')
png('map.png',width=1500,height=1000)
print(map2)
dev.off()
#create for loop for year
for (year in unique(amap$YEAR)) {
map2 <- map1 + scale_fill_brewer(palette='Blues') + theme_bw(base_size = 24) +
geom_polygon(data=filter(amap,YEAR==year),aes(x=long,y=lat,group=group,fill=Population),color='black') +
labs(title=paste('Persons identified as Armenian,',year,sep=' '))
png(paste('map_',year,'.png',sep=''),width=1500,height=1000)
print(map2)
dev.off()
}
#animate map!
install.packages('devtools')
library(devtools)
devtools::install_github('dgrtwo/gganimate')
library(gganimate)
anmap <- map2 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=amap,aes(x=long,y=lat,group=group,fill=Population,frame=YEAR),color='black') +
labs(title='Persons identified as Armenian,')
gg_animate(anmap,ani.width=1500,ani.height=1000,'anmap.gif')
#2. Clustered Column Graph of Sex Ratio of Identifiable Armenians and Non-Armenians
#load packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
#read in IPUMS data and account for parsing errors
ipums <- read_csv('finalproj.csv',col_types=cols(PERWT=col_double()))
#create Worcester city variable
worc <- ipums %>% filter(CITY==7570)
#create character for Race
pop <- worc %>% mutate(Race=ifelse(BPLD==46540 | MBPLD==46540 | FBPLD==46540 | MTONGUE==28 | MMTONGUE==28 | FMTONGUE==28 | LANGUAGE==28 | ANCESTR1==431 | ANCESTR2==431,'Armenian', 'Non-Armenian'))
#write .csv file
write_csv(worc,'worc.csv')
#replace NA with 0 to create character above
worc[is.na(worc)] <- 0
#recode SEX
sex <- pop %>% mutate(Sex=ifelse(SEX==1,'Male','Female'))
#select Year, PERWT, SLWT, Race
new <- sex %>% select(YEAR,PERWT,SLWT, Race,Sex)
#account for sample line variable in 1940
new1 <- new %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
#Group by Year and Race
f1 <- new1 %>% group_by(Race,Sex,YEAR) %>% summarise(Number=sum(Weight))
#Export figures to png
png('Graph2.png',height=500,width=1000)
#graphing
ggplot(data=f1,aes(x=YEAR, y=Number,fill=Sex)) +
geom_bar(stat='identity',position = 'dodge') +
labs(x='Year', y='Population', fill='',title='Population of Armenians and Non-Armenians in Worcester, MA by Year and Sex, 1900-2000') +
scale_y_continuous(labels=scales::comma) +
scale_x_continuous(breaks=c(1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000)) +
scale_fill_brewer(palette='Set2') +
facet_wrap(~Race,scales='free_y') +
theme_bw()
dev.off()
#3. Stacked Column Graph of Occupation of Identifiable Armenians and Non-Armenians
#load packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
#read in IPUMS data & omit non-working age people
ipums <- read_csv('finalproj.csv',col_types=cols(PERWT=col_double())) %>% filter(AGE>=15 & AGE<=65)
#create Worcester city variable
worc <- ipums %>% filter(CITY==7570)
#creat a character for identifiable Armenians and non-Armenians
pop1 <- worc %>% mutate(Race=ifelse(BPLD==46540 | MBPLD==46540 | FBPLD==46540 | MTONGUE==28 | MMTONGUE==28 | FMTONGUE==28 | LANGUAGE==28 | ANCESTR1==431 | ANCESTR2==431,'Armenian', 'Non-Armenian'))
#write .csv file
write_csv(worc,'worc.csv')
#replace NA with 0 to create character above
worc[is.na(worc)] <- 0
#create Occupation variable by recoding OCC1950
occ <- pop1 %>% mutate(Occupation=factor(ifelse(OCC1950 %in% c(980:999), 1,
ifelse(OCC1950 %in% c(100) | OCC1950 %in% c(123) | OCC1950 %in% c(810:840) , 2,
ifelse(OCC1950 %in% c(500:690) | OCC1950 %in% c(910:979),3,
ifelse(OCC1950 %in% c(200:490), 4,
ifelse(OCC1950 %in% c(700:790) ,5,
ifelse(OCC1950 %in% c(000:099),6,NA)))))),
labels=c('None','Farmers/Farm Laborers','Craftsmen/Operatives/Laborers',
'Managerial/Clerical/Sales','Service',
'Professional')))
#recode SEX
sex <- occ %>% mutate(Sex=ifelse(SEX==1,'Male','Female'))
#narrow down to variables I need by selecting YEAR, PERWT, SLWT, Race, Occupation, and Sex
select <- sex %>% select(YEAR,PERWT,SLWT,Race,Occupation,Sex)
#account for sample line variable in 1940
select1 <- select %>% mutate(Weight=ifelse(YEAR==1940,SLWT,PERWT))
#Group by Year, Sex, Race, and Occupation
f1 <- select1 %>% group_by(YEAR, Sex, Race,Occupation) %>% summarise(Number=sum(Weight))
#Export figures to png
png('Graph3.png',height=500,width=1000)
#graph it!
ggplot(data=arrange(f1,Occupation),aes(x=YEAR, y=Number,fill=Occupation)) +
geom_bar(stat='identity',position='fill') +
labs(x='Year', y='Percent of Population', fill='Occupation',title='Occupation of Armenians and Non-Armenians Aged 15-65 by Year and Sex in Worcester, MA, 1900-2000') +
scale_y_continuous(labels=scales::percent) +
scale_x_continuous(breaks=c(1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000)) +
scale_fill_brewer(palette='Set1') +
facet_grid(Sex~.~Race) +
theme_bw()+ theme(legend.position='bottom')
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment