NunoSempere/ssc_ea_mental_health_2020_analysis.R

## ssc_ea_mental_health_2020_analysis.R
## Description:
## Libraries
library(ggplot2)
## Helpers
formatAsPercent <- function (float){
  return(sprintf("%0.1f%%", float * 100))
}

## Body
data <- read.csv("2020ssc_public.csv", header=TRUE, stringsAsFactors = FALSE)
data_EAs <- data[data["EAID"] == "Yes",]
View(data_EAs)
n=dim(data_EAs)[1]
n ## 993 EAs answered the survey.

List_of_Mental_Illnesses <- colnames(data)[56:67]
cat("Illness, rate diagnosed, rate suspected")
for(x in List_of_Mental_Illnesses){
  rate_diagnosed=sum(data_EAs[x] == "I have a formal diagnosis of this condition")/n ## 18%
  rate_suspected=sum(data_EAs[x] == "I think I might have this condition, although I have never been formally diagnosed")/n ## 18%
  cat(paste(x, formatAsPercent(rate_diagnosed), formatAsPercent(rate_suspected), sep=", "), "\n")
}

## Processing
tally_diagnosed=list()
for( j in List_of_Mental_Illnesses){
  tally_diagnosed$count = c(tally_diagnosed$count,
               sum(
                 data_EAs[j] == "I have a formal diagnosis of this condition"
               )
  )
  tally_diagnosed$type = c(tally_diagnosed$type, j)
}
tally_diagnosed

tally_diagnosed$frequency = 100*tally_diagnosed$count / n
as.data.frame(tally_diagnosed) -> tally_diagnosed
tally_diagnosed

## Intuited
titulo="Prevalence of mental health ilnesses in EAs"
subtitulo="using 2020 SlateStarCodex survey data"
tally_intuited=list()
for( j in List_of_Mental_Illnesses){
  tally_intuited$count = c(tally_intuited$count,
                            sum(
                              data_EAs[j] == "I have a formal diagnosis of this condition"
                            )
  )
  tally_intuited$type = c(tally_intuited$type, j)
}
tally_intuited

tally_intuited$frequency = 100*tally_intuited$count / n
as.data.frame(tally_intuited) -> tally_intuited
tally_intuited

## Together

tally_diagnosed$diagnosed = "Diagnosed"
tally_intuited$diagnosed = "Intuited"

as.data.frame(rbind(tally_diagnosed, tally_intuited)) -> tally_mental_health

## Plotting
titulo="Prevalence of mental health ilnesses in EAs"
subtitulo="using self-indentified EAs who answered the 2020 SlateStarCodex survey (n=993)"

(ggplot(data = tally_mental_health, aes(x =type, y = frequency, fill=diagnosed)) +
    geom_histogram(
      stat="identity",
      position= position_stack(reverse = TRUE)
    )+
    scale_fill_manual(
      values=c("royalblue3","steelblue2"))+
    scale_x_discrete(
      limits=
        List_of_Mental_Illnesses[
          order(tally_diagnosed$count +tally_intuited$count ) ## This orders the ilnesses by frequency
        ]
    )+
    scale_y_continuous(
      labels=function(x) paste0(x,"%"))+
    coord_flip()+
    labs(
      title=titulo,
      subtitle=subtitulo,
      x=NULL,
      y="Frequency (%)",
      caption="@NunoSempere"
    )+
    theme(
      legend.title = element_blank(),
      plot.subtitle = element_text(hjust = 0.5),
      plot.title = element_text(hjust = 0.5),
      legend.position="bottom"
    )
)

height=5
width=height*(1+sqrt(5))/2

ggsave("Q5and6.png" , units="in", width=width, height=height, dpi=800)
	## Description:
	## Libraries
	library(ggplot2)
	## Helpers
	formatAsPercent <- function (float){
	return(sprintf("%0.1f%%", float * 100))
	}

	## Body
	data <- read.csv("2020ssc_public.csv", header=TRUE, stringsAsFactors = FALSE)
	data_EAs <- data[data["EAID"] == "Yes",]
	View(data_EAs)
	n=dim(data_EAs)[1]
	n ## 993 EAs answered the survey.

	List_of_Mental_Illnesses <- colnames(data)[56:67]
	cat("Illness, rate diagnosed, rate suspected")
	for(x in List_of_Mental_Illnesses){
	rate_diagnosed=sum(data_EAs[x] == "I have a formal diagnosis of this condition")/n ## 18%
	rate_suspected=sum(data_EAs[x] == "I think I might have this condition, although I have never been formally diagnosed")/n ## 18%
	cat(paste(x, formatAsPercent(rate_diagnosed), formatAsPercent(rate_suspected), sep=", "), "\n")
	}

	## Processing
	tally_diagnosed=list()
	for( j in List_of_Mental_Illnesses){
	tally_diagnosed$count = c(tally_diagnosed$count,
	sum(
	data_EAs[j] == "I have a formal diagnosis of this condition"
	)
	)
	tally_diagnosed$type = c(tally_diagnosed$type, j)
	}
	tally_diagnosed

	tally_diagnosed$frequency = 100*tally_diagnosed$count / n
	as.data.frame(tally_diagnosed) -> tally_diagnosed
	tally_diagnosed

	## Intuited
	titulo="Prevalence of mental health ilnesses in EAs"
	subtitulo="using 2020 SlateStarCodex survey data"
	tally_intuited=list()
	for( j in List_of_Mental_Illnesses){
	tally_intuited$count = c(tally_intuited$count,
	sum(
	data_EAs[j] == "I have a formal diagnosis of this condition"
	)
	)
	tally_intuited$type = c(tally_intuited$type, j)
	}
	tally_intuited

	tally_intuited$frequency = 100*tally_intuited$count / n
	as.data.frame(tally_intuited) -> tally_intuited
	tally_intuited

	## Together

	tally_diagnosed$diagnosed = "Diagnosed"
	tally_intuited$diagnosed = "Intuited"

	as.data.frame(rbind(tally_diagnosed, tally_intuited)) -> tally_mental_health

	## Plotting
	titulo="Prevalence of mental health ilnesses in EAs"
	subtitulo="using self-indentified EAs who answered the 2020 SlateStarCodex survey (n=993)"

	(ggplot(data = tally_mental_health, aes(x =type, y = frequency, fill=diagnosed)) +
	geom_histogram(
	stat="identity",
	position= position_stack(reverse = TRUE)
	)+
	scale_fill_manual(
	values=c("royalblue3","steelblue2"))+
	scale_x_discrete(
	limits=
	List_of_Mental_Illnesses[
	order(tally_diagnosed$count +tally_intuited$count ) ## This orders the ilnesses by frequency
	]
	)+
	scale_y_continuous(
	labels=function(x) paste0(x,"%"))+
	coord_flip()+
	labs(
	title=titulo,
	subtitle=subtitulo,
	x=NULL,
	y="Frequency (%)",
	caption="@NunoSempere"
	)+
	theme(
	legend.title = element_blank(),
	plot.subtitle = element_text(hjust = 0.5),
	plot.title = element_text(hjust = 0.5),
	legend.position="bottom"
	)
	)

	height=5
	width=height*(1+sqrt(5))/2

	ggsave("Q5and6.png" , units="in", width=width, height=height, dpi=800)