Created
October 13, 2015 02:18
-
-
Save mrecos/fc4013ec65e27581436c to your computer and use it in GitHub Desktop.
Attempt at model plot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The original plot in question appears to be the results of a model | |
# based on scores and age, a distribution is predicted for score on | |
# each age class. The range of predicted scores is visualized as a | |
# normal'ish density over each age class. There is also a point which is the | |
# mean observation of the raw score at each age class. This does not need to | |
# be at the mean of the distribution depending on fit of the model. | |
# There is also a line that is the predicted response of model over age classes. | |
# | |
# I did not have the data or model from this example, so I improvised. | |
# I first created sythetic model data based on the example, and from the | |
# model prediction, created a response by adding some noise. Backwards, but | |
# fine for here I suppose. There are plenty of things that would need some | |
# adjusting with real data, but it was a fun excericse to see if this could work. | |
# | |
# Clearly, the biggest difference here is the densities displayed as violins | |
# and not the one-sided density. I messed around a bit with no luck, but maybe | |
# someone could show me how to fix that. | |
# create synthetic modelled data | |
score <- c(18,22,26,30,32,34,36,38,40,41,42) | |
age <- seq(6.5, 16.5, length.out = length(score)) | |
dat <- NULL | |
for(i in seq_along(score)){ | |
score_sampl <- rnorm(10e3,score[i],2) | |
obs <- data.frame(score = score_sampl, age = age[i]) | |
dat <- rbind(dat, obs) | |
} | |
# derive the response by adding noise to model output | |
obs_score_mean <- jitter(score, amount = 2) | |
obs_score_mean <- data.frame(score_mean = obs_score_mean, age = age) | |
prediction <- data.frame(pred = score, age = age) | |
# the plot uses data from a handful of sources to show the layers | |
# the use of a single source versus multiple sources is up for debate, | |
# but with something like this, it is hard to fit all layers into a single DF. | |
# well, for me it is anyway | |
ggplot(data = dat, aes(x = age, y = score, group = age)) + | |
# this stat allows for the general config of the plot | |
stat_ydensity(geom = "violin", color = "red") + | |
# the summary make work to show the means depending on data cofiguration | |
# stat_summary(fun.y = "median", geom = "point") | |
# i used geom_point and brought in the observation data | |
geom_point(data = obs_score_mean, aes(x = age, y = score_mean), | |
size = 4) + | |
# geom_smooth to draw the predictive fit line | |
geom_smooth(data = prediction, aes(x = age, y = pred, group = 1), | |
size = 1, color = "black", se = FALSE, method = "loess") + | |
# theme stuff to change look | |
theme_bw() + | |
theme( | |
panel.grid = element_blank(), | |
axis.text = element_text(face = "bold", size = rel(1)), | |
axis.title.x = element_text(face = "bold", size = rel(1.1), vjust = -0.5), | |
axis.title.y = element_text(face = "bold", size = rel(1.1), vjust = 1.25) | |
) + | |
# adjust axis attributes | |
scale_x_continuous(limits=c(6.5, 16.5), breaks = seq(6.5,16.5,1), | |
name = "Age in years") + | |
scale_y_continuous(limits=c(0, 50), breaks = seq(0,50,5), | |
name = "raw score") | |
# save (on mac) | |
quartz.save(file = "example_density.png", height = 5, width = 8, | |
type = "png", device = dev.cur(), dpi = 200) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment