Skip to content

Instantly share code, notes, and snippets.

@zacharystansell
Created August 15, 2020 06:45
Show Gist options
  • Save zacharystansell/e181413d20d74e1e7f22f863ca0af672 to your computer and use it in GitHub Desktop.
Save zacharystansell/e181413d20d74e1e7f22f863ca0af672 to your computer and use it in GitHub Desktop.
Big_5_Personality_By_Country
library("tidyverse")
library("data.table")
library("ggplot2")
library("ggpubr")
library("dplyr")
library("magrittr")
library("countrycode")
# Data normalizing func
normalize <- function(x) 100*(x-min(x))/(max(x)-min(x))
data <- fread(
# Read data pulled down from site
"~/PROJECTS/FUN/personality/data-final.csv",
sep = "\t",
#na.strings=("NA"),
stringsAsFactors = FALSE
) %>%
# Remove some rows with data I'm not using
select(c(1:50, 101:110)) %>%
filter(
IPC == 1,
as.numeric(as.character(introelapse)) < 60,
as.numeric(as.character(testelapse)) < 900,
as.numeric(as.character(endelapse)) < 60,
) %>%
group_by(country) %>%
# Require at least 1000 individuals, filter any inds without a country
filter(n() > 1000 && country != "NONE") %>%
droplevels() %>%
# change factors to numeric
mutate_at(c(1:50), as.numeric, ) %>%
# Calc scores based on https://ipip.ori.org/new_ipip-50-item-scale.htm
mutate(
O = normalize(OPN1 - OPN2 + OPN3 - OPN4 + OPN5 - OPN6 + OPN7 + OPN8 + OPN9 + OPN10),
C = normalize(CSN1 - CSN2 + CSN3 - CSN4 + CSN5 - CSN6 + CSN7 - CSN8 + CSN9 + CSN10),
E = normalize(EXT1 - EXT2 + EXT3 - EXT4 + EXT5 - EXT6 + EXT7 - EXT8 + EXT9 - EXT10),
A = normalize(AGR1 + AGR2 - AGR3 + AGR4 - AGR5 + AGR6 - AGR7 + AGR8 + AGR9 + AGR10),
N = normalize(EST1 - EST2 + EST3 - EST4 + EST5 + EST6 + EST7 + EST8 + EST9 + EST10)
)
#recode country names
data$country <- countrycode(data$country, "iso2c","country.name")
data <- data %>%
mutate(country = recode(country,
`Hong Kong SAR China` = "Hong Kong",
`United States` = "U.S.",
`United Kingdom` = "U.K.",
`United Arab Emirates` = "U.A.E."))
O <-
ggplot(data, aes(
x = O,
y = reorder(country, O),
fill = country
)) + geom_violin() + theme_minimal() + theme(legend.position = "none") + labs(title = "Openess",x="", y = "")
C <-
ggplot(data, aes(
x = C,
y = reorder(country, C),
fill = country
)) + geom_violin() + theme_minimal() + theme(legend.position = "none") + labs(title =
"Conscientiousness",x="", y = "")
E <-
ggplot(data, aes(
x = E,
y = reorder(country, E),
fill = country
)) + geom_violin() + theme_minimal() + theme(legend.position = "none") + labs(title =
"Extroversion",x="", y = "")
A <-
ggplot(data, aes(
x = A,
y = reorder(country, A),
fill = country
)) + geom_violin() + theme_minimal() + theme(legend.position = "none") + labs(title =
"Agreeableness",x="", y = "")
N <-
ggplot(data, aes(
x = N,
y = reorder(country, N),
fill = country
)) + geom_violin() + theme_minimal() + theme(legend.position = "none") + labs(title =
"Neuroticism",x="", y = "")
ggsave(
"OCEAN.png",
plot = ggarrange(O, C, E, A, N , nrow = 1),
height = 15,
width = 15,
dpi = 300
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment