FFFiend/gistQ.R

## gistQ.R
# Load required libraries
library(ggplot2)
library(rstanarm)

# Generate sample data
set.seed(123)
data <- data.frame(
  age_group = sample(c("18-30", "31-45", "46-60", "61+"), 100, replace = TRUE),
  gender = sample(c("Male", "Female"), 100, replace = TRUE),
  income_group = sample(c("Low", "Medium", "High"), 100, replace = TRUE),
  highest_education = sample(c("High School", "Bachelor's", "Master's", "PhD"), 100, replace = TRUE),
  political_party_support = sample(c("Yes", "No"), 100, replace = TRUE)
)

# Plot the graph
ggplot(data, aes(x = age_group, fill = political_party_support)) +
  geom_bar(position = "fill") +
  facet_wrap(~gender) +
  labs(title = "Political Party Support by Age Group and Gender",
       x = "Age Group",
       y = "Proportion",
       fill = "Political Party Support")

data$political_party_support <- factor(data$political_party_support, levels = c("Yes", "No"))

# Build the model using rstanarm
model <- stan_glm(political_party_support ~ age_group + gender + income_group + highest_education,
                  data = data,
                  family = binomial(link = "logit"))

# Print the summary of the model
summary(model)

# tests
cat("Age Group:\n")
cat("Test 1: Count of unique age groups:", length(unique(data$age_group)), "\n")
cat("Test 2: Any missing values in age group column:", anyNA(data$age_group), "\n")
cat("Test 3: Unique values in age group column:", unique(data$age_group), "\n")
cat("Test 4: Frequency of each age group:\n", table(data$age_group), "\n")
cat("Test 5: Does '18-30' appear in age group column:", "18-30" %in% data$age_group, "\n")
cat("Test 6: Median age of individuals:", median(as.numeric(gsub("\\+", "70", gsub("-", ".", data$age_group)))), "\n")
cat("Test 7: More individuals in '31-45' than '46-60':", sum(data$age_group == "31-45") > sum(data$age_group == "46-60"), "\n")
cat("Test 8: Percentage of individuals aged 61+:", mean(data$age_group == "61+") * 100, "%\n")
cat("Test 9: '18-30' is the youngest, '61+' is the oldest:", "18-30" == min(data$age_group) && "61+" == max(data$age_group), "\n")
cat("Test 10: Any duplicate entries in age group column:", anyDuplicated(data$age_group), "\n\n")
	# Load required libraries
	library(ggplot2)
	library(rstanarm)

	# Generate sample data
	set.seed(123)
	data <- data.frame(
	age_group = sample(c("18-30", "31-45", "46-60", "61+"), 100, replace = TRUE),
	gender = sample(c("Male", "Female"), 100, replace = TRUE),
	income_group = sample(c("Low", "Medium", "High"), 100, replace = TRUE),
	highest_education = sample(c("High School", "Bachelor's", "Master's", "PhD"), 100, replace = TRUE),
	political_party_support = sample(c("Yes", "No"), 100, replace = TRUE)
	)

	# Plot the graph
	ggplot(data, aes(x = age_group, fill = political_party_support)) +
	geom_bar(position = "fill") +
	facet_wrap(~gender) +
	labs(title = "Political Party Support by Age Group and Gender",
	x = "Age Group",
	y = "Proportion",
	fill = "Political Party Support")

	data$political_party_support <- factor(data$political_party_support, levels = c("Yes", "No"))

	# Build the model using rstanarm
	model <- stan_glm(political_party_support ~ age_group + gender + income_group + highest_education,
	data = data,
	family = binomial(link = "logit"))

	# Print the summary of the model
	summary(model)

	# tests
	cat("Age Group:\n")
	cat("Test 1: Count of unique age groups:", length(unique(data$age_group)), "\n")
	cat("Test 2: Any missing values in age group column:", anyNA(data$age_group), "\n")
	cat("Test 3: Unique values in age group column:", unique(data$age_group), "\n")
	cat("Test 4: Frequency of each age group:\n", table(data$age_group), "\n")
	cat("Test 5: Does '18-30' appear in age group column:", "18-30" %in% data$age_group, "\n")
	cat("Test 6: Median age of individuals:", median(as.numeric(gsub("\\+", "70", gsub("-", ".", data$age_group)))), "\n")
	cat("Test 7: More individuals in '31-45' than '46-60':", sum(data$age_group == "31-45") > sum(data$age_group == "46-60"), "\n")
	cat("Test 8: Percentage of individuals aged 61+:", mean(data$age_group == "61+") * 100, "%\n")
	cat("Test 9: '18-30' is the youngest, '61+' is the oldest:", "18-30" == min(data$age_group) && "61+" == max(data$age_group), "\n")
	cat("Test 10: Any duplicate entries in age group column:", anyDuplicated(data$age_group), "\n\n")