Created
August 31, 2022 16:13
-
-
Save aliaksandrkazlou/6141cf3c030a79e3fedb079fc07cabd2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### Libraries -------------------- | |
library(haven) | |
library(tidyverse) | |
library(sjlabelled) | |
library(zoo) | |
#### Data Prep -------------------- | |
# from https://data.stanford.edu/hcmst | |
df <- read_dta("~/Downloads/HCMST 2017 fresh sample for public sharing draft v1.1.dta") | |
# select only whose who broke up | |
df_tmp <- df %>% | |
select(Q21E_2_Year, Q21E_2_Month, Q21B_2_Year, Q21B_2_Month) %>% | |
filter(Q21E_2_Year >= 0 & Q21B_2_Year >= 0 & Q21E_2_Month >= 0 & Q21B_2_Month >= 0) | |
# data formatting | |
df_tmp$year_met <- get_labels(df_tmp$Q21B_2_Year)[df_tmp$Q21B_2_Year] | |
df_tmp$year_break_up <- get_labels(df_tmp$Q21E_2_Year)[df_tmp$Q21E_2_Year] | |
df_tmp$date_met <- as.yearmon(paste(df_tmp$year_met, df_tmp$Q21B_2_Month), "%Y %m") | |
df_tmp$date_break_up <- as.yearmon(paste(df_tmp$year_break_up, df_tmp$Q21E_2_Month), "%Y %m") | |
# relationships duration | |
df_tmp$rl_years <- floor(as.numeric(difftime(as.Date(df_tmp$date_break_up), as.Date(df_tmp$date_met), unit="weeks"))/52.25) | |
median(df_tmp$rl_years) | |
mean(df_tmp$rl_years) | |
#### Simulation -------------------- | |
n <- 8 | |
leo_age_pool <- c(18, 20, 23, 22, 20, 25, 24, 20) | |
age_lim <- seq(25, 50, 1) | |
rs <- c() | |
for (i in 1:length(age_lim)) { | |
rs_j <- c() | |
for (j in 1:100000) { | |
# sample partners' age from Leo's pool | |
age_sample <- sample(age, n, replace = TRUE) | |
# sample relationships duration from a pool of normal human beings | |
rl_years_sample <- sample(df_tmp$rl_years, n) | |
# calculate age of breakup | |
age_breakup <- age_sample + rl_years_sample | |
rs_j[j] <- ifelse(all(age_breakup <= age_lim[i]), 1, 0) | |
} | |
rs[i] <- mean(rs_j) | |
} | |
df_rs <- data.frame(x = age_lim, y = rs) | |
# prob is 0.03372 | |
df_rs %>% filter(age_lim == 25) | |
ggplot(df_rs, aes(x = x, y = y)) + | |
geom_line(color="red") + | |
labs(y = "Верагоднасць", | |
x = "Узрост партнёра", | |
title = "Верагоднасць разыйсціся з 8 партнёрамі запар, пакуль іхні ўзрост <= X") + | |
theme_bw() + | |
theme(axis.text=element_text(size=12), | |
axis.title=element_text(size=14,face="bold")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment