Created
February 22, 2024 02:25
-
-
Save paulovillarroel/3e626161e6edb0556988cf7a46a69403 to your computer and use it in GitHub Desktop.
Example of how to perform a survival analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load necessary libraries | |
library(tidyverse) # For data manipulation | |
library(survival) # For survival analysis | |
library(survminer) # For survival visualization | |
# Load TidyTuesday data for the 5th week of 2023 | |
data <- tidytuesdayR::tt_load(2023, week = 05) | |
# Extract relevant datasets | |
cats_uk <- data$cats_uk | |
data_reference <- data$cats_uk_reference | |
# Merge datasets based on tag_id | |
df <- cats_uk |> | |
inner_join(data_reference, by = "tag_id") | |
# Prepare data for survival analysis | |
surv_df <- df |> | |
select(timestamp, hunt, animal_sex) |> | |
filter(!is.na(hunt)) |> | |
mutate( | |
animal_sex = ifelse(animal_sex == "m", 1, 2), # Convert animal_sex to numeric | |
hunt = ifelse(hunt == "FALSE", 0, 1), # Convert hunt to numeric | |
timestamp = as.POSIXct(timestamp, format = "%Y-%m-%d %H:%M:%S"), # Convert timestamp to POSIXct | |
day = as.Date(timestamp, "%Y-%m-%d %H:%M:%S", tz = "GMT") # Extract day from timestamp | |
) | |
# Calculate date range | |
range(surv_df$day) | |
# Define start and end dates for analysis | |
start_date <- as.Date("2017-06-03") | |
end_date <- as.Date("2017-11-30") | |
date_seq <- seq(start_date, end_date, by = "day") | |
# Group data by day and calculate time elapsed from start date | |
cat_hz <- surv_df |> | |
group_by(day) |> | |
mutate(time = as.numeric(day - start_date) + 1) |> # Calculate time elapsed | |
arrange(time) # Arrange data by time | |
# Perform survival analysis | |
fit <- survfit(Surv(time, hunt) ~ animal_sex, data = cat_hz) | |
# Generate survival plot | |
ggsurv <- ggsurvplot(fit, | |
data = cat_hz, | |
censor.shape = "|", | |
censor.size = 4, | |
risk.table = TRUE, | |
submain = "Hazards Distribution and Sex Differences in Hunting Risk", | |
caption = "Based on Kaplan-Meier estimates\nDataSource: #TidyTuesday 2023 Week5 Pet Cats UK" | |
) | |
# Customize plot appearance | |
ggsurv$plot %+% | |
ggthemes::scale_colour_fivethirtyeight(labels = c("Male", "Female")) %+% | |
labs(title = "Survival of UK Cats") %+% | |
theme_survminer( | |
base_family = "Roboto Condensed", | |
font.main = c(18, "bold"), | |
font.submain = c(14, "bold.italic"), | |
font.caption = c(11, "plain"), | |
font.x = c(12, "bold.italic"), | |
font.y = c(12, "bold.italic"), | |
font.tickslab = c(12, "plain") | |
) %+% | |
theme( | |
plot.background = element_rect(fill = "grey90", color = "grey90"), | |
panel.background = element_rect(fill = "grey90", color = "grey90"), | |
legend.background = element_blank() | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment