Code for Pokemon post-evolution CP model blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########## FUNCTIONS ################### | |
sign_flip <- function(x){ | |
flip <- rbinom(1,1,0.5) | |
x <- ifelse(flip == 0, x, -x) | |
return(x) | |
} | |
sim_CP_new <- function(dat, spec, shape1, shape2, reps = 100){ | |
spec_dat <- filter(dat1, species == eval(spec)) | |
b_spec <- rbeta(10000, shape1, shape2) | |
b_spec <- sapply(b_spec, FUN = sign_flip) | |
b_spec <- b_spec + spec_dat$spec_mean_pdcp[1] | |
sim_spec <- replicate(reps,spec_dat$cp * (1 + sample(b_spec, nrow(spec_dat)))) | |
return(sim_spec) | |
} | |
prep_sim_data_for_plot <- function(sim_spec, spec_dat){ | |
sim_spec <- data.frame(sim_spec, | |
cp_new = spec_dat$cp_new) | |
spec_melt <- reshape2::melt(sim_spec) | |
spec_melt$model <- rep(c("sim","data"), | |
times = c(((ncol(sim_spec)-1)*nrow(sim_spec)) | |
,nrow(sim_spec))) | |
spec_melt$species <- as.character(unique(spec_dat$species)) | |
return(spec_melt) | |
} | |
sim_beta_RMSE <- function(dat, melt, draws_from_beta){ | |
melt <- filter(melt, model == "sim") | |
melt$cp_new <- rep(dat$cp_new, times = draws_from_beta) | |
melt$id <- rep(1:nrow(dat), times = draws_from_beta) | |
beta_rmse <- group_by(melt, id) %>% | |
summarise(means = mean(value), | |
sd = sd(value), | |
rmse = sqrt(mean((cp_new - value)^2)), | |
mae = mean(abs(cp_new - value)), | |
species = eval(unique(as.character(dat$species))), | |
cp_new = mean(cp_new)) %>% | |
data.frame() | |
return(beta_rmse) | |
} | |
sim_CP_new_by_param <- function(dat, spec, param_dist, reps = 100){ | |
shape_sample <- param_dist[sample(nrow(param_dist),1),] | |
shape1 <- shape_sample[1,1] | |
shape2 <- shape_sample[1,2] | |
spec_dat <- filter(dat1, species == eval(spec)) | |
f_spec <- fitdist(abs(spec_dat$cent_spec_mean_pdcp),"beta") | |
b_spec <- rbeta(10000, shape1, shape2) | |
b_spec <- sapply(b_spec, FUN = sign_flip) | |
b_spec <- b_spec + spec_dat$spec_mean_pdcp[1] | |
# simulate cp_new of known pokes under [reps] different beta draws | |
sim_spec <- replicate(reps,spec_dat$cp * (1 + sample(b_spec, nrow(spec_dat))), | |
simplify = TRUE) | |
return(sim_spec) | |
} | |
sim_param_RMSE <- function(dat, melt, param_draws){ | |
melt$cp_new <- rep(dat$cp_new, param_draws) | |
sim_rmse <- group_by(melt, Var1) %>% | |
summarise(means = mean(value), | |
sd = sd(value), | |
rmse = sqrt(mean((cp_new - value)^2)), | |
mae = mean(abs(cp_new - value)), | |
species = eval(unique(as.character(dat$species))), | |
cp_new = mean(cp_new)) %>% | |
data.frame() | |
} | |
prep_psim_for_plot <- function(psim_spec_dat, spec_dat){ | |
psim_spec <- matrix(psim_spec_dat, nrow = nrow(spec_dat), | |
ncol = param_draws * beta_draws, | |
byrow = FALSE) | |
psim_spec_melt <- melt(psim_spec) | |
psim_spec_means <- group_by(psim_spec_melt, Var1) %>% | |
summarise(means = mean(value)) %>% | |
arrange(means) | |
psim_spec_melt$ord <- factor(psim_spec_melt$Var1, levels = psim_spec_means$Var1) | |
spec_dat$id <- seq(1:nrow(spec_dat)) | |
spec_dat$ord <- factor(spec_dat$id, levels = psim_spec_means$Var1) | |
return(list(psim_spec_melt, spec_dat)) | |
} | |
pedict_simulated_cp_new <- function(sim_dat, shape1, shape2, reps = 100){ | |
# replicate a bunch of beta values from parametrized distribution | |
b_spec <- rbeta(10000, shape1, shape2) | |
# use sign_flip to randomly flip sign (p = 0.5) | |
b_spec <- sapply(b_spec, FUN = sign_flip) | |
# add the pos/neg beta (variation) to the mean pdcp for that species | |
b_spec <- b_spec + sim_dat$spec_mean_pdcp[1] | |
# replicate evolutions scenerios sampled from b_spec on simulated pokes | |
sim_spec <- replicate(reps,sim_dat$cp * (1 + sample(b_spec, nrow(sim_dat)))) | |
return(sim_spec) | |
} | |
mround <- function(x,base){ | |
base*round(x/base) | |
} | |
sim_new_species <- function(spec_ranges, spec_dat){ | |
spec <- eval(unique(spec_dat$specie)) | |
spec_cp_range <- data.frame(species = eval(unique(spec_dat$species)), | |
cp = mround(seq(spec_ranges[which(spec_ranges$species == spec),"min_cp"], | |
spec_ranges[which(spec_ranges$species == spec),"max_cp"], | |
length.out = length_out), base), | |
spec_mean_pdcp = spec_dat$spec_mean_pdcp[1]) | |
return(spec_cp_range) | |
} | |
make_pred_cp_plot_dat <- function(predicted_spec, spec_cp_range){ | |
spec_cp_new_ranges <- group_by(melt(predicted_spec), Var1) %>% | |
summarise( | |
cp_new_min = floor(min(value)), | |
cp_new_5 = floor(quantile(value, 0.05)), | |
cp_new_mean = floor(mean(value)), | |
cp_new_95 = ceiling(quantile(value, 0.95)), | |
cp_new_max = ceiling(max(value))) %>% | |
mutate(cp = spec_cp_range$cp) %>% | |
data.frame() | |
return(spec_cp_new_ranges) | |
} | |
########## END FUNCTIONS ###################### | |
########## Require Libraries ################## | |
library("dplyr") # for data wrangling | |
library("corrplot") # for correlation plot | |
library("fitdistrplus") # for distribution GoF | |
library("ggplot2") # for plotting | |
library("ggalt") # for added plotting functions | |
library("broom") # for tidy | |
library("tidyr") # for gather() | |
library("tibble") # for display of data | |
library("xtable") # for output of tables for blog | |
library("ggthemes") # for color-blind safe plot colors | |
library("reshape2") # for melt function | |
########## END Require Libraries ############## | |
setwd("~/Dropbox/R/pokemon/") | |
pokedat <- as_tibble(read.csv("pokemon.csv")) | |
dat1 <- mutate(pokedat, delta_cp = cp_new - cp) %>% # net change in cp | |
# the % change in cp from old to new | |
mutate(pcnt_d_cp = delta_cp / cp) %>% | |
# group by species to calculate additonal varaibles | |
group_by(species) %>% | |
# species grouped mean percent change | |
mutate(spec_mean_pdcp = mean(pcnt_d_cp)) %>% | |
# species grouped std dev of % changes from old to new cp | |
mutate(spec_mean_pdcp_sd = sd(pcnt_d_cp)) %>% | |
# difference in % delta cp (pdcp) change from species group mean | |
mutate(cent_spec_mean_pdcp = pcnt_d_cp - spec_mean_pdcp) %>% | |
# z score for pdcp change within species group | |
mutate(z_spec_mean_pdcp = cent_spec_mean_pdcp / spec_mean_pdcp_sd) %>% | |
data.frame() | |
# Z score to answer the more appropriate question: | |
# what leads to an above average gain in CP per evolution within a given species? | |
print(xtable(table(dat1$species)), type = "html") | |
# plot the full 2k by 2k base pairs() plot for fun | |
# corr plot | |
c2 <- cor(dat1[,c(3:8,11,14:20,23,26,28:33)]) | |
corrplot(c2, method = "square", order = "hclust", | |
tl.col = "black", tl.cex = 1.5) | |
# mean CP & HP Table | |
dat1 %>% | |
group_by(species) %>% | |
summarize(Mean_CP = round(mean(cp),2), | |
Mean_HP = round(mean(hp),2)) %>% | |
xtable(.) %>% | |
print(., type = "html", include.rownames=FALSE) | |
# R and R^2 table | |
dat1 %>% | |
group_by(species) %>% | |
summarize(r = round(cor(cp, hp),3)) %>% | |
mutate(r2 = round(r^2,3)) %>% | |
xtable(.) %>% | |
print(., type = "html", include.rownames=FALSE) | |
# plot linear correlation of CP & HP by species | |
ggplot(data = dat1, aes(x = cp, y = hp, color = species)) + | |
geom_point() + | |
geom_smooth(method = "lm", se = FALSE) + | |
# scale_color_pokemon("infernape") + | |
scale_color_colorblind() + | |
theme_bw() + | |
labs(title="Correlation of Pokémon CP and HP", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "CP", | |
y = "HP") + | |
theme( | |
axis.text.x = element_text(size = 10, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 10, family = "Trebuchet MS"), | |
axis.title = element_text(size = 12, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank() | |
) | |
ggsave("CP_HP_corr.png", width = 6, height = 5) | |
# plot distriubtion of centered PDCP | |
ggplot(dat1, aes(x = cent_spec_mean_pdcp, fill = species)) + | |
geom_density() + | |
geom_vline(xintercept = 0, color = "gray60", linetype = 2) + | |
# scale_fill_pokemon("infernape") + | |
scale_fill_colorblind() + | |
facet_wrap(~species, nrow = 1) + | |
theme_bw() + | |
labs(title="Distribution or Percent Change in CP from Species Mean", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Centered PDCP", | |
y = "Density") + | |
theme( | |
strip.background = element_rect(colour = "white", fill = "white"), | |
strip.text.x = element_text(colour = "black", size = 10, face = "bold", | |
family = "Trebuchet MS"), | |
panel.margin = unit(0, "lines"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
legend.position = "none", | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank() | |
) | |
ggsave("centered_PDCP.png", width = 6, height = 5) | |
# resdiuals of centered PDCP | |
ggplot(data = dat1, aes(x = cp, y = cent_spec_mean_pdcp, color = species)) + | |
geom_hline(yintercept = 0, color = "gray60", linetype = 2) + | |
geom_point() + | |
scale_color_colorblind() + | |
facet_wrap(~species, nrow = 2) + | |
theme_bw() + | |
labs(title="Centered Percent Change in CP from Species Mean", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "CP", | |
y = "Density") + | |
theme( | |
strip.background = element_rect(colour = "white", fill = "white"), | |
strip.text.x = element_text(colour = "black", size = 10, face = "bold", | |
family = "Trebuchet MS"), | |
panel.margin = unit(0, "lines"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
legend.position = "none", | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank() | |
) | |
ggsave("centered_PDCP_over_CP.png", width = 6, height = 5) | |
# trend of mean species PDCP and variation in PDCP | |
ggplot(data = dat1, aes(x = spec_mean_pdcp, y = spec_mean_pdcp_sd, color = species)) + | |
geom_line(color = "gray65") + | |
geom_point(size = 4) + | |
scale_color_colorblind() + | |
theme_bw() + | |
labs(title="Relationship if Mean and Variation in PDCP by Species", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Mean PDCP", | |
y = "Std Dev of PDCP") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("mean_vs_stdDev_PDCP.png", width = 6, height = 5) | |
# look for universal deviation from mean | |
quantile(dat1$cent_spec_mean_pdcp) # difference from mean | |
# distrobution of Centered PDCP for all species | |
ggplot(dat1, aes(x = cent_spec_mean_pdcp)) + | |
geom_vline(xintercept = 0, color = "gray60", linetype = 2) + | |
geom_density(fill = "skyblue") + | |
scale_x_continuous(breaks = c(seq(-0.45,0.15,0.05))) + | |
geom_vline(xintercept = 0, color = "gray60", linetype = 2) + | |
theme_bw() + | |
labs(title="Distribution of Deviations from Centered PDCP", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Deviation from Centered PDCP", | |
y = "Density") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("dist_dev_from_centered_PDCP.png", width = 6, height = 5) | |
# distrobution of absolute centered PDCP for all species | |
ggplot(dat1, aes(x = abs(cent_spec_mean_pdcp))) + | |
geom_vline(xintercept = 0, color = "gray60", linetype = 2) + | |
geom_density(fill = "skyblue") + | |
scale_x_continuous(breaks = c(seq(0,0.5,0.05))) + | |
geom_vline(xintercept = 0, color = "gray60", linetype = 2) + | |
theme_bw() + | |
labs(title="Distribution of Absolute Deviations from Centered PDCP", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Deviation from Centered PDCP", | |
y = "Density") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("ABS_dist_dev_from_centered_PDCP.png", width = 6, height = 5) | |
### Fit beta parameters to data via MLE | |
# look at dist. being parametric about this | |
f3 <- fitdist(abs(dat1$cent_spec_mean_pdcp),"beta") | |
# plot boostrapped common distributions reatlive to data for comparison | |
descdist(abs(dat1$cent_spec_mean_pdcp), boot = 1001) | |
# save via quartz, may not work on Windows | |
quartz.save(file = "Cullen_Frey_graph.png", height = 5, width = 6, | |
type = "png", device = dev.cur(), dpi = 300) | |
# looking at MLE fit of beta to data | |
summary(f3) | |
gofstat(f3) # Goodness-of-Fit statistics | |
plot(f3) # CDF, QQ< PP, Density plot | |
# sample form beta distribution based on the MLE estimate of [f3] | |
b <- rbeta(10000,f3$estimate[1], f3$estimate[2]) | |
# plot empirical data versus samples from parameterized beta distribution | |
ggplot() + | |
geom_density(data = data.frame(beta_samp = b), | |
aes(x = beta_samp), fill = "orange") + | |
geom_density(data = dat1, aes(x = abs(cent_spec_mean_pdcp)), fill = "skyblue3", alpha = 0.55) + | |
theme_bw() + | |
scale_x_continuous(breaks = c(seq(0,0.5,0.05))) + | |
geom_vline(xintercept = 0, color = "gray60", linetype = 2) + | |
theme_bw() + | |
labs(title="Comparirson of Empirical and Simulated Beta Distribution", | |
subtitle="Orange = Beta(0.57, 15.68), Blue = empirical", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Absolute Deviation from Centered PDCP", | |
y = "Density") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold"), | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank() | |
) | |
ggsave("Beta_vs_empirical_dist_centered_PDCP.png", width = 6, height = 5) | |
###### Simulate plausible evolution paths for each observed Pokémon in sample | |
### Sequence to sample shape parameters from parametrized beta | |
### Apply these beta draws to each pokémon species through | |
### data generating model | |
draws_from_beta = 100 | |
shape1 = f3$estimate[1] | |
shape2 = f3$estimate[2] | |
# Simulate plausible evolution scenarios based on our model | |
# the melt data for plotting purposes | |
# sim_pidgey | |
sim_pidgey <- sim_CP_new(dat1, "Pidgey", shape1, shape2, draws_from_beta) | |
pidgey_dat <- filter(dat1, species == "Pidgey") | |
pidgey_melt <- prep_sim_data_for_plot(sim_pidgey, pidgey_dat) | |
# sim_eevee | |
sim_eevee <- sim_CP_new(dat1, "Eevee", shape1, shape2, draws_from_beta) | |
eevee_dat <- filter(dat1, species == "Eevee") | |
eevee_melt <- prep_sim_data_for_plot(sim_eevee, eevee_dat) | |
# sim_caterpie | |
sim_caterpie <- sim_CP_new(dat1, "Caterpie",shape1, shape2, draws_from_beta) | |
caterpie_dat <- filter(dat1, species == "Caterpie") | |
caterpie_melt <- prep_sim_data_for_plot(sim_caterpie, caterpie_dat) | |
# sim_weedle | |
sim_weedle <- sim_CP_new(dat1, "Weedle",shape1, shape2, draws_from_beta) | |
weedle_dat <- filter(dat1, species == "Weedle") | |
weedle_melt <- prep_sim_data_for_plot(sim_weedle, weedle_dat) | |
# combine all | |
sim_pokemon <- rbind(pidgey_melt, eevee_melt, caterpie_melt, weedle_melt) | |
## plot density of empirical post-evolution CP (red line) over | |
## density of models simulated evolution scenarios (blue lines) | |
## scenarios based on model and universal beta shape parameter estimates from [f3] | |
ggplot(data = sim_pokemon, aes(x = value,group = variable, color = model)) + | |
geom_line(stat="density", size=0.75, alpha=0.25) + | |
geom_line(data = filter(sim_pokemon, model == "data"), | |
aes(x = value), stat="density", color = "red", size = 0.25) + | |
guides(color = guide_legend(override.aes = list(alpha=1))) + | |
scale_color_manual(values = c("red", "skyblue")) + | |
facet_grid(species~., scales = "free") + | |
theme_bw() + | |
labs(title="Density of Simulated and Empirical Post-Evolution CP per Sepcies", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Post-Evolution CP", | |
y = "Density") + | |
theme( | |
strip.background = element_rect(colour = "white", fill = "white"), | |
strip.text.x = element_text(colour = "black", size = 10, face = "bold", | |
family = "Trebuchet MS"), | |
strip.text.y = element_text(colour = "black", size = 10, face = "bold", | |
family = "Trebuchet MS"), | |
panel.margin = unit(0, "lines"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
legend.position = "bottom", | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank() | |
) | |
ggsave("Species_simulated_CP_New.png", width = 8, height = 8) | |
# Check errors via RMSE of simulated evolution scenerios vs data | |
pidgey_sim_beta_rmse <- sim_beta_RMSE(pidgey_dat, pidgey_melt) | |
eevee_sim_beta_rmse <- sim_beta_RMSE(eevee_dat, eevee_melt) | |
caterpie_sim_beta_rmse <- sim_beta_RMSE(caterpie_dat, caterpie_melt) | |
weedle_sim_beta_rmse <- sim_beta_RMSE(weedle_dat, weedle_melt) | |
# combine together | |
species_sim_beta_metrics <- rbind(pidgey_sim_beta_rmse, eevee_sim_beta_rmse, | |
caterpie_sim_beta_rmse, weedle_sim_beta_rmse) | |
# plot jittered distribution of RMSE for the mean post-evolution CP | |
# for each observed pokémon by species | |
ggplot(species_sim_beta_metrics, aes(x = species, y = rmse, | |
group = species, color = species)) + | |
geom_jitter(width = 0.3) + | |
theme_bw() + | |
scale_color_colorblind() + | |
labs(title="RMSE Error for Mean Beta Sample Post-evolution CP ", | |
subtitle = "Mean of n = 100 beta samples for each species observation", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Species", | |
y = "RMSE") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
legend.position = "right" | |
) | |
ggsave("Species_beta_CP_New_mean_RMSE.png", width = 6, height = 3) | |
# table of RMSE results | |
sim_beta_rmse_table <- group_by(species_sim_beta_metrics, species) %>% | |
summarise(cp_new_mean = round(mean(cp_new),2), | |
sim_cp_new_mean = round(mean(means),2), | |
sd_mean = round(mean(sd),2), | |
rmse_mean = round(mean(rmse),2), | |
mae_mean = round(mean(mae),2)) %>% | |
mutate(pcnt_mae = round(mae_mean / cp_new_mean,3) * 100) %>% | |
data.frame() | |
# xtable for blog | |
print(xtable(sim_beta_rmse_table), type = "html", include.rownames=FALSE) | |
##### Look at pluasibe variation in beta shape parameters | |
### instead of one universal MLE estimate, this samples many plausible | |
### shape paramers via bootstrap, then estiamtes evolution scenerios for | |
### a range of models that are coherent with the given data | |
# Boostrap 1001 different pluasible parameters pairs for beta distribution | |
beta_boot <- bootdist(f3, niter = 1001) | |
# extract estimates | |
boot_est <- beta_boot$estim | |
# get qunatiles of boostrapped parameter estimates and put in table for blog | |
shp1_boot_est <- round(quantile(boot_est[,1], c(0.025, 0.25, 0.5, 0.75, 0.95)),2) | |
shp2_boot_est <- round(quantile(boot_est[,2], c(0.025, 0.25, 0.5, 0.75, 0.95)),2) | |
boot_est_m <- rbind(shp1_boot_est, shp2_boot_est) | |
print(xtable(boot_est_m), type = "html") | |
# plot distribution and density of plausible shape parameter pairs | |
ggplot(beta_boot$estim, aes(x = shape1, y = shape2)) + | |
geom_point(alpha = 0.95, color = "skyblue3") + | |
geom_density_2d(color = "gray50", alpha = 0.85) + | |
theme_bw() + | |
labs(title="Distribution of Shape Parameters for Beta Distribution", | |
subtitle="Boostraps, n = 1001", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Shape 1", | |
y = "Shape 2") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Boostrapped_shape_parameters.png", width = 6, height = 5) | |
# samples of 100 different pidgeys (e.g. PDCPs), under different parameters consistent with the data | |
# sim_pidgey with params | |
param_draws = 100 | |
beta_draws = 1 | |
param_dist <- beta_boot$estim | |
# Simulate beta_draws post-evolution scenerios from param_draws different | |
# beta distirbution parameterizations (e.g. other plausbile models that incorprate | |
# uncertainty in limited number ofobservations) | |
psim_pidgey <- replicate(param_draws, sim_CP_new_by_param(dat1, "Pidgey", param_dist, beta_draws)) | |
psim_eevee <- replicate(param_draws, sim_CP_new_by_param(dat1, "Eevee", param_dist, beta_draws)) | |
psim_weedle <- replicate(param_draws, sim_CP_new_by_param(dat1, "Weedle", param_dist, beta_draws)) | |
psim_caterpie <- replicate(param_draws, sim_CP_new_by_param(dat1, "Caterpie", param_dist, beta_draws)) | |
# prepare psim data for plotting | |
psim_pidgey_list <- prep_psim_for_plot(psim_pidgey, pidgey_dat) | |
psim_eevee_list <- prep_psim_for_plot(psim_eevee, eevee_dat) | |
psim_weedle_list <- prep_psim_for_plot(psim_weedle, weedle_dat) | |
psim_caterpie_list <- prep_psim_for_plot(psim_caterpie, caterpie_dat) | |
# Pidgey parameter Sim plot | |
ggplot() + | |
geom_point(data = psim_pidgey_list[[1]], aes(x = ord, y = value), | |
alpha = 0.50, color = "skyblue3") + | |
geom_point(data = psim_pidgey_list[[2]], aes(x = ord, y = cp_new), color = "red") + | |
theme_bw() + | |
labs(title="Range of Simulated Post Evolution CP - Pidgey", | |
subtitle="Simulated Beta Parameters, n = 100", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Pokémon Ordered by Mean Simulated cp_new", | |
y = "Post Evolution CP") + | |
theme( | |
axis.text.x = element_blank(), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Simulated_shape_parameters_pidgey.png", width = 6, height = 5) | |
# Eevee parameter Sim plot | |
ggplot() + | |
geom_point(data = psim_eevee_list[[1]], aes(x = ord, y = value), | |
alpha = 0.50, color = "skyblue3") + | |
geom_point(data = psim_eevee_list[[2]], aes(x = ord, y = cp_new), color = "red") + | |
theme_bw() + | |
labs(title="Range of Simulated Post Evolution CP = Eevee", | |
subtitle="Simulated Beta Parameters, n = 100", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Pokémon Ordered by Mean Simulated cp_new", | |
y = "Post Evolution CP") + | |
theme( | |
axis.text.x = element_blank(), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Simulated_shape_parameters_eevee.png", width = 6, height = 5) | |
# Weedle parameter Sim plot | |
ggplot() + | |
geom_point(data = psim_weedle_list[[1]], aes(x = ord, y = value), | |
alpha = 0.50, color = "skyblue3") + | |
geom_point(data = psim_weedle_list[[2]], aes(x = ord, y = cp_new), color = "red") + | |
theme_bw() + | |
labs(title="Range of Simulated Post Evolution CP = Weedle", | |
subtitle="Simulated Beta Parameters, n = 100", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Pokémon Ordered by Mean Simulated cp_new", | |
y = "Post Evolution CP") + | |
theme( | |
axis.text.x = element_blank(), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Simulated_shape_parameters_weedle.png", width = 6, height = 5) | |
# Caterpie parameter Sim plot | |
ggplot() + | |
geom_point(data = psim_caterpie_list[[1]], aes(x = ord, y = value), | |
alpha = 0.50, color = "skyblue3") + | |
geom_point(data = psim_caterpie_list[[2]], aes(x = ord, y = cp_new), color = "red") + | |
theme_bw() + | |
labs(title="Range of Simulated Post Evolution CP = Caterpie", | |
subtitle="Simulated Beta Parameters, n = 100", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Pokémon Ordered by Mean Simulated cp_new", | |
y = "Post Evolution CP") + | |
theme( | |
axis.text.x = element_blank(), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Simulated_shape_parameters_caterpie.png", width = 6, height = 5) | |
# RMSE results from simualted beta parameters | |
pidgey_sim_rmse <- sim_param_RMSE(pidgey_dat, psim_pidgey_list[[1]]) | |
eevee_sim_rmse <- sim_param_RMSE(eevee_dat, psim_eevee_list[[1]]) | |
caterpie_sim_rmse <- sim_param_RMSE(caterpie_dat, psim_caterpie_list[[1]]) | |
weedle_sim_rmse <- sim_param_RMSE(weedle_dat, psim_weedle_list[[1]]) | |
# combine RMSE | |
species_sim_metrics <- rbind(pidgey_sim_rmse, eevee_sim_rmse, | |
caterpie_sim_rmse, weedle_sim_rmse) | |
# plot RMSE of simulated models parameters | |
ggplot(species_sim_metrics, aes(x = species, y = rmse, | |
group = species, color = species)) + | |
geom_jitter(width = 0.3) + | |
theme_bw() + | |
scale_color_colorblind() + | |
labs(title="RMSE Error for Mean Simulated Post-evolution CP ", | |
subtitle = "Mean of n = 100 simulations for each species observation", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Species", | |
y = "RMSE") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
legend.position = "right" | |
) | |
ggsave("Species_simulated_CP_New_mean_RMSE.png", width = 6, height = 3) | |
# put metris into a table | |
sim_rmse_table <- group_by(species_sim_metrics, species) %>% | |
summarise(cp_new_mean = round(mean(cp_new),2), | |
sim_cp_new_mean = round(mean(means),2), | |
sd_mean = round(mean(sd),2), | |
rmse_mean = round(mean(rmse),2), | |
mae_mean = round(mean(mae),2)) %>% | |
mutate(pcnt_mae = round(mae_mean / cp_new_mean,3) * 100) %>% | |
data.frame() | |
# print table HTML for blog | |
print(xtable(sim_rmse_table), type = "html", include.rownames=FALSE) | |
#### Model parameters in Full Bayes??!!! Are you crazy?! | |
## estimation using JAGS | |
library("rjags") # if you dare!!! | |
# extract abs_cent_spec_mean_pdcp for jags estimation | |
y <- abs(dat1$cent_spec_mean_pdcp) | |
# build model text for JAGE | |
model_string <- "model{ | |
for(i in 1:length(y)) { | |
y[i] ~ dbeta(alpha, beta) # y distributed as a beta | |
} | |
alpha ~ dt(0,1,1)T(0,) # half cauchy prior | |
beta ~ dt(0,1,1)T(0,) # half cauchy prior | |
}" | |
# initialize model with data | |
model <- jags.model(textConnection(model_string), data = list(y = y), n.chains = 3, n.adapt= 10000) | |
update(model, 10000); # Burnin for 10000 samples | |
# sample alpha (shape1) and beta (shape2) from posterior. Can take a while | |
mcmc_samples <- coda.samples(model, variable.names=c("alpha", "beta"), n.iter=20000) | |
# plot(mcmc_samples) # plot chain convergence and parameter density | |
# model parameter distribution stats | |
summary(mcmc_samples) | |
# table of stats and print HTML for blog | |
jags_sum <- summary(mcmc_samples)$quantiles | |
print(xtable(jags_sum), type = "html") | |
# plot shape1 and shape 2 parameters | |
alpha <- mcmc_samples[[1]][,1] | |
beta <- mcmc_samples[[1]][,2] | |
# plot correlated parameters with MLE from earlier param estimation and MAP from posterior | |
ggplot(data = data.frame(alpha = as.numeric(alpha), beta = as.numeric(beta)), | |
aes(x = alpha, y = beta)) + | |
geom_point(alpha = 0.15, color = "skyblue3") + | |
geom_density_2d(color = "gray50", alpha = 0.85) + | |
geom_point(aes(x = 0.57, y = 15.68), color = "yellow", size = 2) + # MLE | |
geom_point(aes(x = mean(alpha), y = mean(beta)), color = "red", size = 2) + #MAP | |
theme_bw() + | |
labs(title="Posterior of Beta Distribution Shape Parameters", | |
subtitle="n = 20K samples, red = MAP, yellow = MLE", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Shape 1", | |
y = "Shape 2") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Bayes_simulated_shape_parameters.png", width = 6, height = 5) | |
# prior, post, plot | |
post_plot <- data.frame(value = c(alpha, beta), | |
parameter = rep(c("alpha", "beta"), each = length(alpha)), | |
model = "posterior") | |
alpha_half_cauchy <- rt(nrow(post_plot),1,1) | |
alpha_half_cauchy <- alpha_half_cauchy[alpha_half_cauchy > 0] | |
beta_half_cauchy <- rt(nrow(post_plot),1,1) | |
beta_half_cauchy <- beta_half_cauchy[beta_half_cauchy > 0] | |
prior_plot <- data.frame(value = c(alpha_half_cauchy,beta_half_cauchy), | |
parameter = rep(c("alpha", "beta"), | |
times = c(length(alpha_half_cauchy), length(beta_half_cauchy))), | |
model = "prior" | |
) | |
jags_plot_dat <- rbind(post_plot, prior_plot) | |
ggplot(data = filter(jags_plot_dat, parameter == "alpha"), | |
aes(x = value, group = model, color = model)) + | |
geom_density() + | |
scale_x_continuous(limits = c(0,10), breaks = seq(0,10,0.5)) + | |
scale_color_manual(values = c("orange", "purple")) + | |
theme_bw() + | |
labs(title="Prior and Posterior of Shape1 Parameter of Beta Distribution", | |
subtitle="Half Cauchy as T distribution (1,1) > 0", | |
caption="20K Samples from JAGS model", | |
x = "Shape1 Parameter", | |
y = "Density") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("alpha_plot.png", width = 6, height = 5) | |
ggplot(data = filter(jags_plot_dat, parameter == "beta"), | |
aes(x = value, group = model, color = model)) + | |
geom_density() + | |
scale_x_continuous(limits = c(0,100), breaks = seq(0,100,5)) + | |
scale_color_manual(values = c("orange", "purple")) + | |
theme_bw() + | |
labs(title="Prior and Posterior of Shape2 Parameter of Beta Distribution", | |
subtitle="Half Cauchy as T distribution (1,1) > 0", | |
caption="20K Samples from JAGS model", | |
x = "Shape2 Parameter", | |
y = "Density") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("beta_plot.png", width = 6, height = 5) | |
###### Generalized predictions | |
base = 10 | |
sim_reps = 10000 | |
length_out = 10 | |
# don't need all of this for here, but good summary for all species | |
spec_ranges <- group_by(dat1, species) %>% | |
summarise(min_cp = min(cp), | |
max_cp = max(cp), | |
mean_cp = mean(cp), | |
mean_cp_new = mean(cp_new), | |
mean_pdcp = mean(pcnt_d_cp), | |
mean_abs_cent_spec_mean_pdcp = mean(abs(cent_spec_mean_pdcp))) %>% | |
data.frame() | |
print(xtable(spec_ranges), type = "html", include.rownames=FALSE) | |
## Predict siualted Pokémon species based on empirical range | |
# Pidgey | |
pidgey_cp_range <- sim_new_species(spec_ranges, pidgey_dat) | |
predicted_pidgey <- pedict_simulated_cp_new(pidgey_cp_range, f3$estimate[1], f3$estimate[2], reps = sim_reps) | |
pidgey_cp_new_ranges <- make_pred_cp_plot_dat(predicted_pidgey, pidgey_cp_range) | |
# Eevee | |
eevee_cp_range <- sim_new_species(spec_ranges, eevee_dat) | |
predicted_eevee <- pedict_simulated_cp_new(eevee_cp_range, f3$estimate[1], f3$estimate[2], reps = sim_reps) | |
eevee_cp_new_ranges <- make_pred_cp_plot_dat(predicted_eevee, eevee_cp_range) | |
# Weedle | |
weedle_cp_range <- sim_new_species(spec_ranges, weedle_dat) | |
predicted_weedle <- pedict_simulated_cp_new(weedle_cp_range, f3$estimate[1], f3$estimate[2], reps = sim_reps) | |
weedle_cp_new_ranges <- make_pred_cp_plot_dat(predicted_weedle, weedle_cp_range) | |
# Caterpie | |
caterpie_cp_range <- sim_new_species(spec_ranges, caterpie_dat) | |
predicted_caterpie <- pedict_simulated_cp_new(caterpie_cp_range, f3$estimate[1], f3$estimate[2], reps = sim_reps) | |
caterpie_cp_new_ranges <- make_pred_cp_plot_dat(predicted_caterpie, caterpie_cp_range) | |
# line plot with error ribbons, ok, but not too flashy | |
ggplot(data = pidgey_cp_new_ranges, aes(x = cp, y = cp_new_mean)) + | |
geom_ribbon(aes(ymin = cp_new_min, ymax = cp_new_max),alpha=0.4, fill = "skyblue") + | |
geom_ribbon(aes(ymin = cp_new_5, ymax = cp_new_95),alpha=0.75, fill = "skyblue3") + | |
geom_point(data = pidgey_dat, aes(x = cp, y = cp_new), color = "gray30", alpha = 0.60) + | |
geom_line(color = "firebrick", size = 0.75) + | |
scale_x_continuous(breaks = seq(min(pidgey_cp_new_ranges$cp), | |
max(pidgey_cp_new_ranges$cp),by = 20)) + | |
scale_y_continuous(breaks = seq(min(pidgey_cp_new_ranges$cp_new_min), | |
max(pidgey_cp_new_ranges$cp_new_max),by = 50)) + | |
theme_bw() + | |
labs(title="Range of Model Predictions for CP_new - Pidgey", | |
subtitle="Mean, 95% prediction interval & min/max prediction", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Pre-Evolution CP", | |
y = "Modeled Post-Evolution CP") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Predicted_cp_new_pidgey.png", width = 6, height = 5) | |
# eevee plot | |
ggplot(data = eevee_cp_new_ranges, aes(x = cp, y = cp_new_mean)) + | |
geom_ribbon(aes(ymin = cp_new_min, ymax = cp_new_max),alpha=0.4, fill = "skyblue") + | |
geom_ribbon(aes(ymin = cp_new_5, ymax = cp_new_95),alpha=0.75, fill = "skyblue3") + | |
geom_point(data = eevee_dat, aes(x = cp, y = cp_new), color = "gray30", alpha = 0.50) + | |
geom_line(color = "firebrick", size = 0.75) + | |
scale_x_continuous(breaks = seq(min(eevee_cp_new_ranges$cp), | |
max(eevee_cp_new_ranges$cp),by = 20)) + | |
scale_y_continuous(breaks = seq(min(eevee_cp_new_ranges$cp_new_min), | |
max(eevee_cp_new_ranges$cp_new_max),by = 50)) + | |
theme_bw() + | |
labs(title="Range of Model Predictions for CP_new - Eevee", | |
subtitle="Mean, 95% prediction interval & min/max prediction", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Pre-Evolution CP", | |
y = "Modeled Post-Evolution CP") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Predicted_cp_new_eevee.png", width = 6, height = 5) | |
# weedle plot | |
ggplot(data = weedle_cp_new_ranges, aes(x = cp, y = cp_new_mean)) + | |
geom_ribbon(aes(ymin = cp_new_min, ymax = cp_new_max),alpha=0.4, fill = "skyblue") + | |
geom_ribbon(aes(ymin = cp_new_5, ymax = cp_new_95),alpha=0.75, fill = "skyblue3") + | |
geom_point(data = weedle_dat, aes(x = cp, y = cp_new), color = "gray30", alpha = 0.50) + | |
geom_line(color = "firebrick", size = 0.75) + | |
scale_x_continuous(breaks = seq(min(weedle_cp_new_ranges$cp), | |
max(weedle_cp_new_ranges$cp),by = 20)) + | |
scale_y_continuous(breaks = seq(min(weedle_cp_new_ranges$cp_new_min), | |
max(weedle_cp_new_ranges$cp_new_max),by = 50)) + | |
theme_bw() + | |
labs(title="Range of Model Predictions for CP_new - weedle", | |
subtitle="Mean, 95% prediction interval & min/max prediction", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Pre-Evolution CP", | |
y = "Modeled Post-Evolution CP") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Predicted_cp_new_weedle.png", width = 6, height = 5) | |
# caterpie plot | |
ggplot(data = caterpie_cp_new_ranges, aes(x = cp, y = cp_new_mean)) + | |
geom_ribbon(aes(ymin = cp_new_min, ymax = cp_new_max),alpha=0.4, fill = "skyblue") + | |
geom_ribbon(aes(ymin = cp_new_5, ymax = cp_new_95),alpha=0.75, fill = "skyblue3") + | |
geom_point(data = caterpie_dat, aes(x = cp, y = cp_new), color = "gray30", alpha = 0.50) + | |
geom_line(color = "firebrick", size = 0.75) + | |
scale_x_continuous(breaks = seq(min(caterpie_cp_new_ranges$cp), | |
max(caterpie_cp_new_ranges$cp),by = 20)) + | |
scale_y_continuous(breaks = seq(min(caterpie_cp_new_ranges$cp_new_min), | |
max(caterpie_cp_new_ranges$cp_new_max),by = 50)) + | |
theme_bw() + | |
labs(title="Range of Model Predictions for CP_new - Caterpie", | |
subtitle="Mean, 95% prediction interval & min/max prediction", | |
caption="Data: https://www.openintro.org/stat/data/?data=pokemon", | |
x = "Pre-Evolution CP", | |
y = "Modeled Post-Evolution CP") + | |
theme( | |
axis.text.x = element_text(size = 8, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 7, hjust=0, margin=margin(t=5), | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Bold") | |
) | |
ggsave("Predicted_cp_new_caterpie.png", width = 6, height = 5) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment