Skip to content

Instantly share code, notes, and snippets.

@prodhimanisha
Last active November 16, 2016 21:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save prodhimanisha/ef2a2d734de532859fe85661bb5f0055 to your computer and use it in GitHub Desktop.
Save prodhimanisha/ef2a2d734de532859fe85661bb5f0055 to your computer and use it in GitHub Desktop.
#Loading packages
library(magrittr)
library(dplyr)
library(readr)
library(plotly)
library(ggplot2)
library(RColorBrewer)
library(plot3D)
#Reading in IPUMS data; pre-filtered to exclude males
subsociety <- read_csv('subsoc.csv')
#Creating subset of data to only include those with related subfamily, and recoding citizenship variable to segregate immigrants from non-immigrants
subsoc <- subsociety %>% filter(SFTYPE %in% c(0:4)) %>% mutate(Citi=factor(ifelse(CITIZEN %in% c(3,4,5),1,2),labels=c('Immigrant','Non-Immigrant')))
#Creating dataset to aggregate total subpopulation values
totpop <- subsoc %>% group_by(YEAR,Citi) %>% summarise(Number=sum(PERWT))
total1 <- totpop %>% select(YEAR,Citi,Number)
#Creating dataset for only those with related subfamily
subfam <- subsoc %>% mutate(SubFam=factor(ifelse(SFTYPE %in% c(1:4),1,2),labels=c('Yes','No')))
subfrac <- left_join(subfam,total1)
subfrac <- subfrac %>% group_by(YEAR,SubFam,Citi) %>% summarise(subnum=sum(PERWT))
subfrac <- left_join(subfrac,total1)
subpct <- subfrac %>% mutate(pct=subnum*100/Number)
#Creating dataset for those with related subfamily and their employment statuses by recoding variable
subemp <- subfam %>% mutate(Employ=factor(ifelse(EMPSTAT==1,1,ifelse(EMPSTAT==2,2,3)),labels=c('Employed','Unemployed','Not in labour force')))
subemp <- subemp %>% group_by(YEAR,Citi,SubFam,Employ) %>% summarise(NumEmp=sum(PERWT))
subemp <- left_join(subemp,total1)
subemp <- left_join(subemp,subfrac)
#Filtering to only include those with related subfamily
empsub <- subemp %>% filter(SubFam=='Yes')
empsub <- empsub %>% mutate(emppct=NumEmp/subnum) %>% group_by(YEAR,Employ)
#Orienting axes for the two comparison groups
empsub <- empsub %>% mutate(emppct=ifelse(Citi=='Non-Immigrant',-1 *emppct,emppct)) %>% filter(YEAR!=1960)
#Creating dataset for number of subpopulation who are unpaid family workers by recoding worker class variable
clswkr <- subfam %>% mutate(WorkClass=factor(ifelse(CLASSWKRD==29,1,2),labels=c('Unpaid Family Worker','Other')))
#Aggregating immigrant and non-immigrant data for those in worker class of unpaid family worker status
clswkr <- clswkr %>% group_by(YEAR,Citi,SubFam,WorkClass) %>% summarise(NumWkr=sum(PERWT))
clswkr <- left_join(clswkr,subfrac)
#Filtering to only include those with related subfamily
wkrsub <- clswkr %>% filter(SubFam=='Yes')
##Calculating fraction of immigrant and non-immigrant female population with subfamily for both worker class differentiations
wkrsub <- wkrsub %>% mutate(wkrpct=NumWkr/subnum) %>% group_by(YEAR,WorkClass)
#Creating dataset for number of times married by recoding variable
marrno <- subfam %>% mutate(TimesMarried=factor(ifelse(MARRNO==1,1,ifelse(MARRNO %in% c(2:6),2,3)),labels=c('Once','More Than Once','Other')))
#Aggregating immigrant and non-immigrant data for categories of being married once versus multiple times
marrno <- marrno %>% group_by(YEAR,Citi,SubFam,TimesMarried) %>% summarise(NumMarr=sum(PERWT))
marrno <- left_join(marrno,subfrac)
#Filtering to only include those with related subfamily
marrsub <- marrno %>% filter(SubFam=='Yes')
#Calculating fraction of immigrant and non-immigrant female population with subfamily for each marital frequency category
marrsub <- marrsub %>% mutate(marrpctsub=NumMarr/subnum) %>% group_by(YEAR,TimesMarried)
#Creating dataset for rule for linking spouse analysis by recoding spousal linkage variable
splink <- subfam %>% mutate(Splink=factor(ifelse(SPRULE==0,1,ifelse(SPRULE==1|SPRULE==4,2,ifelse(SPRULE==2|SPRULE==5,3,4))),labels=c('No Spouse Link','Follows','Precedes','Other')))
#Aggregating immigrant and non-immigrant data for each recoded spousal link category
splink <- splink %>% group_by(YEAR,Citi,Splink) %>% summarise(NumSpl=sum(PERWT))
splink <- left_join(splink,total1)
#Calculating fraction of immigrant and non-immigrant female population in each spouse link category
spl <- splink %>% mutate(splpct=NumSpl/Number)
#Orienting axes for the two comparison groups
spl <- spl %>% mutate(splpct=ifelse(Citi=='Non-Immigrant',-1 *splpct,splpct)) %>% filter(YEAR!=1960)
#Creating subgraph 1
p1 <- plot_ly(filter(empsub,Employ=='Employed'),x=YEAR,y=emppct*100,type='bar',name='Employed',marker=list(line=list(width=2,color="black"))) %>%
add_trace(data=filter(empsub,Employ=='Unemployed'),x=YEAR,y=emppct*100,type='bar',name='Unemployed',marker=list(line=list(width=2,color="black"))) %>%
add_trace(data=filter(empsub,Employ=='Not in labour force'),x=YEAR,y=emppct*100,type='bar',name='Not in Labour Force',marker=list(line=list(width=2,color="black"))) %>%
layout(title='Employment Status',xaxis=list(title='Year'),yaxis=list(tickmode='array',tickvals=c(-80,-60,-40,-20,0,20,40,60,80),ticktext=c('80','60','40','20','0','20','40','60','80')),barmode='relative',height=900,width=750,autosize=F)
print(p1)
#Creating subgraph 2
p2 <- plot_ly(filter(wkrsub,WorkClass=='Unpaid Family Worker' & !(YEAR %in% c(1930,1960))),x=YEAR,y=wkrpct*100,showlegend=FALSE,type='bar',name='Unpaid Family Worker',marker=list(color=ifelse(Citi=='Immigrant',"red","purple"),line=list(width=2,color="black"))) %>%
layout(title='Unpaid Family Workers',xaxis=list(title='Year'),yaxis=list(title='% Non-White Females with Related Subfamily'),barmode='stack',height=900,width=750,autosize=F)
print(p2)
#Creating subgraph 3
p3 <- plot_ly(filter(marrsub,Citi=='Non-Immigrant' & TimesMarried=='More Than Once' & !(YEAR %in% c(1930,1960,1990,2000))),x=YEAR,y=marrpctsub*100,type='scatter',mode='lines+markers',name='Non-Immigrant, Married <br> More Than Once',marker=(list(size=8,color="red",width=5))) %>%
add_trace(data=filter(marrsub,Citi=='Immigrant' & TimesMarried=='More Than Once' & !(YEAR %in% c(1930,1960,1990,2000))),x=YEAR,y=marrpctsub*100,type='bar',name='More Than Once',marker=list(line=list(width=2,color="black")),mode='stack') %>%
add_trace(data=filter(marrsub,Citi=='Immigrant' & TimesMarried=='Once' & !(YEAR %in% c(1930,1960,1990,2000))),x=YEAR,y=marrpctsub*100,type='bar',name='Once',marker=list(line=list(width=2,color="black")),mode='stack') %>%
layout(title='Matrimonial Liberty: In Numbers',xaxis=list(title='Year'),yaxis=list(title='% Non White Females with Related Subfamily'),barmode='stack',height=900,width=750,autosize=F)
print(p3)
#Creating subgraph 4
p4 <- plot_ly(filter(spl,Splink=='No Spouse Link'),y=YEAR,x=splpct*100,type='bar',orientation='h',name='No Spouse Linked',marker=list(line=list(width=2,color="black"))) %>%
add_trace(data=filter(spl,Splink=='Follows'),y=YEAR,x=splpct*100,type='bar',orientation='h',name='Follows Spouse',marker=list(line=list(width=2,color="black"))) %>%
add_trace(data=filter(spl,Splink=='Precedes'),y=YEAR,x=splpct*100,type='bar',orientation='h',name='Precedes Spouse',marker=list(line=list(width=2,color="black"))) %>%
layout(title='Spouses and Subsocieties',titlefont=list(size=18,color='black'),xaxis=list(tickmode='array',tickvals=c(-50,0,50),ticktext=c('50','0','50')),barmode='relative',height=900,width=750,autosize=F,legend=list(x=1.05,y=0.5))
print(p4)
#Creating subplots with formatting for labels, annotations and titles
DataVis2Plots <- subplot(p1,p2,p3,p4,nrows=2, margin = 0.07, shareX=FALSE, titleX=FALSE,shareY=FALSE,titleY=FALSE) %>%
layout(annotations=list(
list(x=0.05,y=1,text="Subgraph 1: Employment Status",showarrow=F,xref='paper',yref='paper',margin=0.1,font=list(size=13,color='black')),
list(x=1.02,y=1,text="Subgraph 2: Unpaid Family Workers",showarrow=F,xref='paper',yref='paper',margin=0.1,font=list(size=13,color='black')),
list(x=0.005,y=0.485,text="Subgraph 3: Matrimonial Liberty",showarrow=F,xref='paper',yref='paper',font=list(size=13,color='black')),
list(x=1.05,y=0.485,text="Subgraph 4: Followers & Precedents of Spouse",showarrow=F,xref='paper',yref='paper',font=list(size=13,color='black')),
list(x=-0.07,y=0.925,text='% Immigrant',showarrow=F,textangle=-90,xref='paper',yref='paper'),
list(x=-0.07,y=0.725,text='% Non-Immigrant',showarrow=F,textangle=-90,xref='paper',yref='paper'),
list(x=0.78,y=0.45,text='Non-Immigrant',showarrow=F,xref='paper',yref='paper'),
list(x=0.95,y=0.45,text='Immigrant',showarrow=F,xref='paper',yref='paper'),
list(x=0.855,y=-0.04,text='% Subpopulation',showarrow=F,xref='paper',yref='paper'),
list(x=0.5,y=0.8,text='% Subpopulation',showarrow=F,xref='paper',yref='paper',textangle=-90),
list(x=0.53,y=0.45,text='Year',showarrow=F,xref='paper',yref='paper'),
list(x=0.82,y=0.9,text='Immigrant',showarrow=F,xref='paper',yref='paper',font=list(color='red')),
list(x=0.875,y=0.79,text='Non-Immigrant',showarrow=F,xref='paper',yref='paper',font=list(color='purple')),
list(x=-0.07,y=0.17,text='% Subpopulation',showarrow=F,textangle=-90,xref='paper',yref='paper'),
list(x=0.17,y=-0.04,text='Year',showarrow=F,xref='paper',yref='paper'),
list(x=0.175,y=0.54,text='Year',showarrow=F,xref='paper',yref='paper'),
list(x=0.825,y=0.54,text='Year',showarrow=F,xref='paper',yref='paper')
))
print(DataVis2Plots)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment