mrecos/density_plot.r

## density_plot.r
# The original plot in question appears to be the results of a model
# based on scores and age, a distribution is predicted for score on
# each age class.  The range of predicted scores is visualized as a
# normal'ish density over each age class.  There is also a point which is the
# mean observation of the raw score at each age class.  This does not need to
# be at the mean of the distribution depending on fit of the model.
# There is also a line that is the predicted response of model over age classes.
#
# I did not have the data or model from this example, so I improvised.
# I first created sythetic model data based on the example, and from the
# model prediction, created a response by adding some noise.  Backwards, but
# fine for here I suppose. There are plenty of things that would need some
# adjusting with real data, but it was a fun excericse to see if this could work.
#
# Clearly, the biggest difference here is the densities displayed as violins
# and not the one-sided density.  I messed around a bit with no luck, but maybe
# someone could show me how to fix that.

# create synthetic modelled data
score <- c(18,22,26,30,32,34,36,38,40,41,42)
age <- seq(6.5, 16.5, length.out = length(score))
dat <- NULL
for(i in seq_along(score)){
  score_sampl <- rnorm(10e3,score[i],2)
  obs <- data.frame(score = score_sampl, age = age[i])
  dat <- rbind(dat, obs)
}
# derive the response by adding noise to model output
obs_score_mean <- jitter(score, amount = 2)
obs_score_mean <- data.frame(score_mean = obs_score_mean, age = age)
prediction <- data.frame(pred = score, age = age)

# the plot uses data from a handful of sources to show the layers
# the use of a single source versus multiple sources is up for debate,
# but with something like this, it is hard to fit all layers into a single DF.
# well, for me it is anyway

ggplot(data = dat, aes(x = age, y = score, group = age)) +
  # this stat allows for the general config of the plot
  stat_ydensity(geom = "violin", color = "red") +
  # the summary make work to show the means depending on data cofiguration
  # stat_summary(fun.y = "median", geom = "point")
  # i used geom_point and brought in the observation data
  geom_point(data = obs_score_mean, aes(x = age, y = score_mean),
             size = 4) +
  # geom_smooth to draw the predictive fit line
  geom_smooth(data = prediction, aes(x = age, y = pred, group = 1),
              size = 1, color = "black", se = FALSE, method = "loess") +
  # theme stuff to change look
  theme_bw() +
  theme(
    panel.grid = element_blank(),
    axis.text = element_text(face = "bold", size = rel(1)),
    axis.title.x = element_text(face = "bold", size = rel(1.1), vjust = -0.5),
    axis.title.y = element_text(face = "bold", size = rel(1.1), vjust = 1.25)
  ) +
  # adjust axis attributes
  scale_x_continuous(limits=c(6.5, 16.5), breaks = seq(6.5,16.5,1),
                     name = "Age in years") +
  scale_y_continuous(limits=c(0, 50), breaks = seq(0,50,5),
                     name = "raw score")

# save (on mac)
quartz.save(file = "example_density.png", height = 5, width = 8,
            type = "png", device = dev.cur(), dpi = 200)
	# The original plot in question appears to be the results of a model
	# based on scores and age, a distribution is predicted for score on
	# each age class. The range of predicted scores is visualized as a
	# normal'ish density over each age class. There is also a point which is the
	# mean observation of the raw score at each age class. This does not need to
	# be at the mean of the distribution depending on fit of the model.
	# There is also a line that is the predicted response of model over age classes.
	#
	# I did not have the data or model from this example, so I improvised.
	# I first created sythetic model data based on the example, and from the
	# model prediction, created a response by adding some noise. Backwards, but
	# fine for here I suppose. There are plenty of things that would need some
	# adjusting with real data, but it was a fun excericse to see if this could work.
	#
	# Clearly, the biggest difference here is the densities displayed as violins
	# and not the one-sided density. I messed around a bit with no luck, but maybe
	# someone could show me how to fix that.

	# create synthetic modelled data
	score <- c(18,22,26,30,32,34,36,38,40,41,42)
	age <- seq(6.5, 16.5, length.out = length(score))
	dat <- NULL
	for(i in seq_along(score)){
	score_sampl <- rnorm(10e3,score[i],2)
	obs <- data.frame(score = score_sampl, age = age[i])
	dat <- rbind(dat, obs)
	}
	# derive the response by adding noise to model output
	obs_score_mean <- jitter(score, amount = 2)
	obs_score_mean <- data.frame(score_mean = obs_score_mean, age = age)
	prediction <- data.frame(pred = score, age = age)

	# the plot uses data from a handful of sources to show the layers
	# the use of a single source versus multiple sources is up for debate,
	# but with something like this, it is hard to fit all layers into a single DF.
	# well, for me it is anyway

	ggplot(data = dat, aes(x = age, y = score, group = age)) +
	# this stat allows for the general config of the plot
	stat_ydensity(geom = "violin", color = "red") +
	# the summary make work to show the means depending on data cofiguration
	# stat_summary(fun.y = "median", geom = "point")
	# i used geom_point and brought in the observation data
	geom_point(data = obs_score_mean, aes(x = age, y = score_mean),
	size = 4) +
	# geom_smooth to draw the predictive fit line
	geom_smooth(data = prediction, aes(x = age, y = pred, group = 1),
	size = 1, color = "black", se = FALSE, method = "loess") +
	# theme stuff to change look
	theme_bw() +
	theme(
	panel.grid = element_blank(),
	axis.text = element_text(face = "bold", size = rel(1)),
	axis.title.x = element_text(face = "bold", size = rel(1.1), vjust = -0.5),
	axis.title.y = element_text(face = "bold", size = rel(1.1), vjust = 1.25)
	) +
	# adjust axis attributes
	scale_x_continuous(limits=c(6.5, 16.5), breaks = seq(6.5,16.5,1),
	name = "Age in years") +
	scale_y_continuous(limits=c(0, 50), breaks = seq(0,50,5),
	name = "raw score")

	# save (on mac)
	quartz.save(file = "example_density.png", height = 5, width = 8,
	type = "png", device = dev.cur(), dpi = 200)