prodhimanisha/Figure1.r

## Figure1.r
#Loading packages
library(magrittr)
library(dplyr)
library(readr)
library(plotly)
library(ggplot2)
library(RColorBrewer)
library(tidyr)

#Reading in IPUMS data (pre-filtered to exclude EMPSTAT!=1 and RACE==1)
density <- read_csv('density.csv')

#Grouping by year, sex, race, and both NPBOSS scores
dg <- density %>% group_by(YEAR,SEX,RACE,NPBOSS50,NPBOSS90)

#Recoding focus group population variables to classify males, immigrant females and non-immigrant females
data <- dg %>% mutate(Subpopulation=ifelse(SEX==1,'Male',ifelse(CITIZEN==1|CITIZEN==2,'Non-Immigrant Female','Immigrant Female')))

#Homogenizing occupational status score variable by assigning year-appropriate score values to the respective year's data
npdata <- data %>% mutate(Score=as.double(ifelse(YEAR>=1930 & YEAR<1960,NPBOSS50,NPBOSS90))) %>% filter(Score<=100)

#Creating dataset to store aggregate denominator values
totals <- data %>% group_by(YEAR,Subpopulation) %>% summarise(Total=sum(PERWT))

#Keeping only variables relevant for the graphs
density <- npdata %>% select(YEAR,Subpopulation,Score,PERWT,SEX,RACE,NPBOSS50,NPBOSS90)
density <- density %>% group_by(YEAR,Subpopulation,Score)

#Excluding the year 1960
dens <- density %>% filter(YEAR!=1960)
dens <- left_join(dens,totals)

#Creating stacked density plots for population distribution in each subpopulation category per Nam-Powers-Boyd score for each year
densplot <- ggplot(data=dens,aes(dens,x=Score,y=..scaled..,weight=(PERWT/Total),fill=Subpopulation)) +
  geom_density(size=0.6,position='stack') + facet_wrap(~YEAR,ncol=2) +
  labs(fill=' ',x='Nam-Powers-Boyd Score',y='Density',title='Weighted Density Distribution, Non-White Population') +
  theme_bw() + theme(axis.title.x=element_text(vjust=-0.0002),axis.title.y=element_text(hjust=0.00001))
ggplotly(densplot) %>% layout(legend=list(x=0.55,y=0.075))
	#Loading packages
	library(magrittr)
	library(dplyr)
	library(readr)
	library(plotly)
	library(ggplot2)
	library(RColorBrewer)
	library(tidyr)

	#Reading in IPUMS data (pre-filtered to exclude EMPSTAT!=1 and RACE==1)
	density <- read_csv('density.csv')

	#Grouping by year, sex, race, and both NPBOSS scores
	dg <- density %>% group_by(YEAR,SEX,RACE,NPBOSS50,NPBOSS90)

	#Recoding focus group population variables to classify males, immigrant females and non-immigrant females
	data <- dg %>% mutate(Subpopulation=ifelse(SEX==1,'Male',ifelse(CITIZEN==1\|CITIZEN==2,'Non-Immigrant Female','Immigrant Female')))

	#Homogenizing occupational status score variable by assigning year-appropriate score values to the respective year's data
	npdata <- data %>% mutate(Score=as.double(ifelse(YEAR>=1930 & YEAR<1960,NPBOSS50,NPBOSS90))) %>% filter(Score<=100)

	#Creating dataset to store aggregate denominator values
	totals <- data %>% group_by(YEAR,Subpopulation) %>% summarise(Total=sum(PERWT))

	#Keeping only variables relevant for the graphs
	density <- npdata %>% select(YEAR,Subpopulation,Score,PERWT,SEX,RACE,NPBOSS50,NPBOSS90)
	density <- density %>% group_by(YEAR,Subpopulation,Score)

	#Excluding the year 1960
	dens <- density %>% filter(YEAR!=1960)
	dens <- left_join(dens,totals)

	#Creating stacked density plots for population distribution in each subpopulation category per Nam-Powers-Boyd score for each year
	densplot <- ggplot(data=dens,aes(dens,x=Score,y=..scaled..,weight=(PERWT/Total),fill=Subpopulation)) +
	geom_density(size=0.6,position='stack') + facet_wrap(~YEAR,ncol=2) +
	labs(fill=' ',x='Nam-Powers-Boyd Score',y='Density',title='Weighted Density Distribution, Non-White Population') +
	theme_bw() + theme(axis.title.x=element_text(vjust=-0.0002),axis.title.y=element_text(hjust=0.00001))
	ggplotly(densplot) %>% layout(legend=list(x=0.55,y=0.075))