Created
April 4, 2024 22:33
-
-
Save FFFiend/973e7220d518ef66a79bc08d0615300f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load required libraries | |
library(dplyr) | |
library(ggplot2) | |
# Set seed for reproducibility | |
set.seed(123) | |
# Define hospitals | |
hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D", "Hospital E") | |
# Generate synthetic data | |
data <- expand.grid( | |
Hospital = hospitals, | |
Year = 2001:2020 | |
) %>% | |
mutate( | |
Cancer_Deaths = round(rnorm(n(), mean = 30, sd = 5)), # Simulate number of cancer deaths | |
Cancer_Type = sample(c("Lung", "Breast", "Colorectal", "Prostate", "Pancreatic"), n(), replace = TRUE), # Simulate cancer type | |
Age_Group = sample(c("Under 40", "40-60", "Over 60"), n(), replace = TRUE, prob = c(0.2, 0.5, 0.3)), # Simulate patient age group | |
Gender = sample(c("Male", "Female"), n(), replace = TRUE), # Simulate patient gender | |
Socioeconomic_Status = sample(c("Low", "Medium", "High"), n(), replace = TRUE, prob = c(0.3, 0.4, 0.3)) # Simulate patient socioeconomic status | |
) | |
# Plot the graph | |
ggplot(data, aes(x = Year, y = Cancer_Deaths, color = Hospital)) + | |
geom_line() + | |
labs(title = "Number of Cancer Deaths by Hospital (2001-2020)", | |
x = "Year", | |
y = "Number of Cancer Deaths", | |
color = "Hospital") | |
# Count of Unique Hospitals | |
cat("Test 1: Count of unique hospitals:", length(unique(data$Hospital)), "\n") | |
# Check for Missing Values in Cancer Deaths | |
cat("Test 2: Any missing values in cancer deaths column:", anyNA(data$Cancer_Deaths), "\n") | |
# Check Unique Cancer Types | |
cat("Test 3: Unique cancer types:", unique(data$Cancer_Type), "\n") | |
# Count of Unique Age Groups | |
cat("Test 4: Count of unique age groups:", length(unique(data$Age_Group)), "\n") | |
# Proportion of Males and Females | |
cat("Test 5: Proportion of males and females:\n", table(data$Gender) / nrow(data), "\n") | |
# Check for Missing Values in Socioeconomic Status | |
cat("Test 6: Any missing values in socioeconomic status column:", anyNA(data$Socioeconomic_Status), "\n") | |
# Average Number of Cancer Deaths per Hospital | |
cat("Test 7: Average number of cancer deaths per hospital:\n", tapply(data$Cancer_Deaths, data$Hospital, mean), "\n") | |
# Check Distribution of Cancer Deaths by Cancer Type | |
cat("Test 8: Distribution of cancer deaths by cancer type:\n", table(data$Cancer_Type), "\n") | |
# Check Distribution of Cancer Deaths by Age Group | |
cat("Test 9: Distribution of cancer deaths by age group:\n", table(data$Age_Group), "\n") | |
# Check Distribution of Cancer Deaths by Socioeconomic Status | |
cat("Test 10: Distribution of cancer deaths by socioeconomic status:\n", table(data$Socioeconomic_Status), "\n") | |
library(rstanarm) | |
# Build Bayesian regression model | |
model <- stan_glm(Cancer_Deaths ~ Cancer_Type + Age_Group + Gender + Socioeconomic_Status, | |
data = data, | |
family = poisson(link = "log"), | |
prior = normal(0, 2.5), | |
prior_intercept = normal(0, 2.5)) | |
# Summary of the model | |
summary(model) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
generated by ChatGPT^