Skip to content

Instantly share code, notes, and snippets.

@samuelsaari
Last active November 17, 2022 16:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samuelsaari/4b198274aabadf2f6061080973789d7c to your computer and use it in GitHub Desktop.
Save samuelsaari/4b198274aabadf2f6061080973789d7c to your computer and use it in GitHub Desktop.
Latent Class Growth Modelling with multinomial response in R - attempt to solve
#-------------------------------------------------------------------------------------------------
# libraries
rm(list=ls())
# install.packages("TraMineR") # example data
# install.packages("OpenRepGrid")# random words for headings
# install.packages("khroma") # color palletttes
# install.packages("tidyverse")
# install.packages("modeldb")
# install.packages("car")
# install.packages("jtools") # for a nice theme
library(flexmix) # GMM & LCGM
library(TraMineR) # example data
library(OpenRepGrid)# random words for headings
library(khroma) # color palletttes
library(tidyverse)
#library(modeldb)
library(car)
library(jtools) # for a nice theme
library(foreach)
library(ggpubr)
theme_set(theme_nice(base_family = 'Consolas' ))
# parameters
TEST_RUN=T
set.seed(102)
# loading and wrangling data
data(biofam)
biofam
biofam <- biofam %>% rename(gender=sex)
d <- biofam %>% select(.,gender,starts_with('a'),p02r01)
d$id <- 1:nrow(d)
d <- d %>% pivot_longer(.,names_to = "age",cols = starts_with('a'),values_to="rel_stat")
d
d <- d %>% mutate(across(c(gender,rel_stat), as.factor))
d <- d %>% transform(age=str_replace(age,"a",""))
d <- d %>% mutate(age=as.integer(age))
d <- tibble(d)
#d <- d %>% mutate(age=age-14)
d
# 1 single
# 2 married
# 3 child
# 4 divorced
d$rel_stat <- car::recode(d$rel_stat,"0=1;1=1;2=2;3=2;4=3;5=3;6=3;7=4")
#d2 <- d %>% modeldb::add_dummy_variables(rel_stat,auto_values = T)
# making alternative ways to run the model
if (TEST_RUN) {
d <- d %>% filter(p02r01 %in% c("no denomination or religion"))
nr_of_classes <- 3
} else {
nr_of_classes <- 4
}
vector_of_chosen_classes <- 1:nr_of_classes
levels_rel_stat <- 1:4 # note that this is defined manually
AGELOW=15
AGEHIGH=30
NR_OF_YEARS_VECTOR <- AGELOW:AGEHIGH
NR_OF_YEARS=length(NR_OF_YEARS_VECTOR)
#----------------------------------------------------------------------------------
# running Latent class growth modelling
#lcgm_formula <- as.formula(rel_stat~age + I(age^2) + gender + gender:age)
lcgm_formula <- as.formula(rel_stat~ age + I(age^2) +gender + age:gender + I(age^2):gender)
lcgm <- flexmix::flexmix(.~ .| id,
data=d,
k=nr_of_classes, # would be 1:12 in real analysis
#nrep=1, # would be 50 in real analysis to avoid local maxima (and we would use the stepFlexmix function instead)
control = list(iter.max = 500, minprior = 0),
model = flexmix::FLXMRmultinom(lcgm_formula))
#---------------------------------------------------------------------------------
# finding parameters
(parameter_list <- purrr::map(lcgm@components, function(x) x[[1]]@parameters$coef))
# adding class to original data
d$class_orig <- lcgm@cluster
#-----------------------------------------------------------------------------
# Changing the order of the classes - for data
does_custom_order_matrix_exist <- ifelse(nr_of_classes==3,T,F)
if (does_custom_order_matrix_exist) {
(order_matrix <- matrix(c(1,3, #the number indicates where the classes were before...
2,1, #the order indicates were we want to have them a.k.a. new classes
3,2),
ncol=2,byrow=T))
} else {
order_matrix <- matrix(cbind(rep(vector_of_chosen_classes,2)),ncol=2,byrow=F)
}
order_matrix
# helper function to swap the order of the classes
recode_class <- function(ORIG_CLASS,IS_MALE) {
column <- ifelse(IS_MALE,1,2)
vector_of_chosen_classes[ match(ORIG_CLASS,order_matrix[,column]) ] # logic: c(1,2,3)[c(T,F,F)]=1
}
# change order of classses
d <- d %>% mutate(.,class=ifelse(gender=="man",recode_class(class_orig,IS_MALE=TRUE),recode_class(class_orig,IS_MALE=FALSE)))
#-----------------------------------------------------------------------------
# Changing the order of the classes - for parameters and predictions
stratify_coefs_by_gender_helper <- function(COEF_MATRIX,WOMAN_INDICATOR) {
intercept <- COEF_MATRIX[,"(Intercept)"]+WOMAN_INDICATOR*COEF_MATRIX[,"genderwoman"]
age <- COEF_MATRIX[,"age"]+WOMAN_INDICATOR*COEF_MATRIX[,"age:genderwoman"]
age2 <- COEF_MATRIX[,"I(age^2)"] +WOMAN_INDICATOR*COEF_MATRIX[,"I(age^2):genderwoman"]
t(cbind(intercept,age,age2))
}
stratify_coefs_by_gender <- function(COEF_MATRIX) {
list("male_coefs"=stratify_coefs_by_gender_helper(COEF_MATRIX,0), "female_coefs"=stratify_coefs_by_gender_helper(COEF_MATRIX,1))
}
# after this, order original, but seperate estimates for men/women
parameter_list_by_gender_orig_order <- purrr::map(parameter_list,stratify_coefs_by_gender)
parameter_list_by_gender_orig_order
# making a copy of the parameter list that is stratified by gender
parameter_list_by_gender <- parameter_list_by_gender_orig_order
# substituting new values based on the order_matrix (defined above)
for (j in vector_of_chosen_classes) {
for (i in 1:2) {
class_name=paste0('class',i)
coef_gender <- ifelse(i==1,"male_coefs","female_coefs")
column <- ifelse(i==1,1,2)
new_index <- which(order_matrix[,column]==j)
parameter_list_by_gender[[new_index]][[i]] <- parameter_list_by_gender_orig_order[[j]][[i]]
}
}
parameter_list_by_gender_orig_order
parameter_list_by_gender # now the order is changed
# we will predict the values based on all combinations of age, age^2 and gender
prediction_dataframe <- expand.grid(c=1,age=NR_OF_YEARS_VECTOR) # gender is included implicitly (coef_matrices different for both genders)
prediction_dataframe$age2 <- prediction_dataframe$age^2
(prediction_matrix <- as.matrix(prediction_dataframe))
predict_one_gender <- function(COEF_MATRIX,PREDICTION_MATRIX=prediction_matrix) {
#add vector for the number of rel_stats!!!!!!!!!!!!!!! Obs nb!
# https://www.ibm.com/support/pages/compute-predicted-probabilities-multinomial-logistic-new-cases-or-outside-spss
predictions <- PREDICTION_MATRIX %*% COEF_MATRIX
predictions <- exp(predictions)
predictions <- cbind(1,predictions) # adding ref category
predictions <- cbind(predictions,rowSums(predictions))
colnames(predictions) <- c(levels_rel_stat,"row_sums")
probabilities <- apply(predictions[,levels_rel_stat],2,function(x) x/predictions[,"row_sums"] )
return(probabilities)
}
predict_one_class <- function(COEF_MATRIX_LIST,PREDICTION_MATRIX=prediction_matrix) {
male_probabilities <- predict_one_gender(COEF_MATRIX_LIST$male_coefs)
female_probabilities <- predict_one_gender(COEF_MATRIX_LIST$female_coefs)
return(list("male_probabilities"=male_probabilities,"female_probabilities"=female_probabilities))
}
prediction_by_class_and_gender <- purrr::map(parameter_list_by_gender,predict_one_class)
# now we have all the probabilities we need, and will add some info and shape the data
add_gender_to_prediction_matrix <- function(my_list) {
male_probabilities <- cbind(my_list$male_probabilities,gender=1)
female_probabilities <- cbind(my_list$female_probabilities,gender=2)
return(list("male_probabilities"=male_probabilities,"female_probabilities"=female_probabilities))
}
combine_male_and_female_prediction <- function(my_list){
my_list <- purrr::map(my_list,function(x) cbind(x,age=NR_OF_YEARS_VECTOR)) # add age
my_list <- add_gender_to_prediction_matrix(my_list)
combined_matrix <- do.call(rbind,my_list)
return(combined_matrix)
}
prediction_by_class <- purrr::map(prediction_by_class_and_gender,
combine_male_and_female_prediction)
pivot_predictions_to_long <- function(combined_matrix) {
combined_tibble <- setNames(as_tibble(combined_matrix,.name_repair = "minimal"),c(levels_rel_stat,"age","gender"))
long_matrix <- pivot_longer(data=combined_tibble,
cols = -all_of(c("age","gender")),
names_to = "rel_stat",
values_to = "probability")
return(long_matrix)
}
prediction_tibbles_long <- purrr::map(prediction_by_class,
pivot_predictions_to_long)
# now we have the predicted data in the right format.
#--------------------------------------------------------------------------------------------------
# let us confirm that the results would be identical if we fitted the data automatically and not manually as above.
# The order will be different
fitted_lcgm <- fitted(lcgm)
fitted_tibbles <- lapply(fitted_lcgm, function(x) cbind(x,d$age,d$gender))
fitted_tibbles <- lapply(fitted_tibbles,function(x) setNames(as_tibble(x,.name_repair = "minimal"),c(levels_rel_stat,"age","gender") ))
fitted_tibbles_long <-purrr::map(fitted_tibbles, function(x) {
pivot_longer(data=x,cols = -all_of(c("age","gender")), names_to = "rel_stat", values_to = "probability")
} )
fitted_tibbles_long <- purrr::map(fitted_tibbles_long,distinct) # remove duplicate rows
# helpers for plotting
text_size <- 12
title_size <- text_size*1.2
line_size=1.6
line_size_legend <- line_size*1.75
gender_labels <- c(`1`="\U2642",`2`="\U2640")
rw <- function() {
randomWords(nr_of_classes)
}
(class_titles <- paste(vector_of_chosen_classes,"class:",rw(),rw(),rw()))
#--------------------------------------------------------------------------------
# plotting
plot_single_class_line <- function(CLASS_DATA,TITLE){
ggplot(CLASS_DATA, aes(x = age,y = probability,color=rel_stat)) +
geom_line(linewidth=line_size) +
scale_color_vibrant() +
guides(color = guide_legend(reverse = TRUE)) +
# common for both plots +
ggtitle(TITLE) +
labs (x = NULL, y=NULL) +
facet_wrap(~ gender,strip.position="left",labeller=labeller(gender=gender_labels))+
theme(text = element_text(size=text_size),
#legend.key.size = unit(1,"pt"),
legend.title=element_blank(),
plot.title = element_text(size =title_size,margin=margin(b=5)),
strip.text.y.left= element_text(angle=0, size=text_size*1.6))
}
#plot_single_class_line(fitted_tibbles_long[[1]],TITLE=class_titles[1])
# saving the plots
class_plot_list_line<- purrr::map(vector_of_chosen_classes,function(i) plot_single_class_line(fitted_tibbles_long[[i]],TITLE=class_titles[i]))
class_plot_list_line_prediction <- purrr::map(vector_of_chosen_classes,function(i) plot_single_class_line(prediction_tibbles_long[[i]],TITLE=class_titles[i]))
# arranging the plot lists with ggarrange
(plot_lcgm_line <- do.call(ggpubr::ggarrange,c(class_plot_list_line, list(common.legend = TRUE, legend = "bottom",ncol=1))))
(plot_lcgm_line <- do.call(ggpubr::ggarrange,c(class_plot_list_line, list(common.legend = TRUE, legend = "bottom",ncol=1))))
ggpubr::annotate_figure(plot_lcgm_line, top =text_grob("Original model", size = 20,face="bold"))
(plot_lcgm_line_prediction <- do.call(ggpubr::ggarrange,c(class_plot_list_line_prediction, list(common.legend = TRUE, legend = "bottom",ncol=1))))
ggpubr::annotate_figure(plot_lcgm_line_prediction, top =text_grob("Label switching corrected", size = 20,face="bold"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment