Skip to content

Instantly share code, notes, and snippets.

@FFFiend
Created April 4, 2024 22:33
Show Gist options
  • Save FFFiend/973e7220d518ef66a79bc08d0615300f to your computer and use it in GitHub Desktop.
Save FFFiend/973e7220d518ef66a79bc08d0615300f to your computer and use it in GitHub Desktop.
# Load required libraries
library(dplyr)
library(ggplot2)
# Set seed for reproducibility
set.seed(123)
# Define hospitals
hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D", "Hospital E")
# Generate synthetic data
data <- expand.grid(
Hospital = hospitals,
Year = 2001:2020
) %>%
mutate(
Cancer_Deaths = round(rnorm(n(), mean = 30, sd = 5)), # Simulate number of cancer deaths
Cancer_Type = sample(c("Lung", "Breast", "Colorectal", "Prostate", "Pancreatic"), n(), replace = TRUE), # Simulate cancer type
Age_Group = sample(c("Under 40", "40-60", "Over 60"), n(), replace = TRUE, prob = c(0.2, 0.5, 0.3)), # Simulate patient age group
Gender = sample(c("Male", "Female"), n(), replace = TRUE), # Simulate patient gender
Socioeconomic_Status = sample(c("Low", "Medium", "High"), n(), replace = TRUE, prob = c(0.3, 0.4, 0.3)) # Simulate patient socioeconomic status
)
# Plot the graph
ggplot(data, aes(x = Year, y = Cancer_Deaths, color = Hospital)) +
geom_line() +
labs(title = "Number of Cancer Deaths by Hospital (2001-2020)",
x = "Year",
y = "Number of Cancer Deaths",
color = "Hospital")
# Count of Unique Hospitals
cat("Test 1: Count of unique hospitals:", length(unique(data$Hospital)), "\n")
# Check for Missing Values in Cancer Deaths
cat("Test 2: Any missing values in cancer deaths column:", anyNA(data$Cancer_Deaths), "\n")
# Check Unique Cancer Types
cat("Test 3: Unique cancer types:", unique(data$Cancer_Type), "\n")
# Count of Unique Age Groups
cat("Test 4: Count of unique age groups:", length(unique(data$Age_Group)), "\n")
# Proportion of Males and Females
cat("Test 5: Proportion of males and females:\n", table(data$Gender) / nrow(data), "\n")
# Check for Missing Values in Socioeconomic Status
cat("Test 6: Any missing values in socioeconomic status column:", anyNA(data$Socioeconomic_Status), "\n")
# Average Number of Cancer Deaths per Hospital
cat("Test 7: Average number of cancer deaths per hospital:\n", tapply(data$Cancer_Deaths, data$Hospital, mean), "\n")
# Check Distribution of Cancer Deaths by Cancer Type
cat("Test 8: Distribution of cancer deaths by cancer type:\n", table(data$Cancer_Type), "\n")
# Check Distribution of Cancer Deaths by Age Group
cat("Test 9: Distribution of cancer deaths by age group:\n", table(data$Age_Group), "\n")
# Check Distribution of Cancer Deaths by Socioeconomic Status
cat("Test 10: Distribution of cancer deaths by socioeconomic status:\n", table(data$Socioeconomic_Status), "\n")
library(rstanarm)
# Build Bayesian regression model
model <- stan_glm(Cancer_Deaths ~ Cancer_Type + Age_Group + Gender + Socioeconomic_Status,
data = data,
family = poisson(link = "log"),
prior = normal(0, 2.5),
prior_intercept = normal(0, 2.5))
# Summary of the model
summary(model)
@FFFiend
Copy link
Author

FFFiend commented Apr 4, 2024

generated by ChatGPT^

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment