Skip to content

Instantly share code, notes, and snippets.

@vanatteveldt
Last active November 6, 2022 09:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vanatteveldt/a1fdf231083b44eca8f3479f91222ffd to your computer and use it in GitHub Desktop.
Save vanatteveldt/a1fdf231083b44eca8f3479f91222ffd to your computer and use it in GitHub Desktop.
# Plot histogram and normal curve for (simulated) data
library(tidyverse)
library(moments)
library(glue)
plot_distribution = function (x) {
m = mean(x)
sd = sd(x)
skewness=skewness(x)
kurtosis=kurtosis(x)
tibble(x=round(x)) |>
ggplot() +
geom_bar(aes(x), fill="darkgrey") +
stat_function(fun=function(...) dnorm(...) * length(x), n=100, args=list(mean=m, sd=sd)) +
scale_y_continuous("Frequency of simulated data",
sec.axis = sec_axis(~ . / length(x), name = "Density for normal distribution")) +
labs(
title="Histogram of (simulated) data with estimated normal curve superimposed",
subtitle=glue("Mean: {round(m,2)}, sd: {round(sd,2)}, skewness: {round(skewness,2)}, kurtosis: {round(kurtosis, 2)}"))
}
# Normal distribution
x <- rnorm( 5000, 0, 2 )
plot_distribution(x)
# Skew right by moving some points over
x <- rnorm( 5000, 0, 2 )
flip =runif(length(x)) > .75
x = ifelse(flip & x<0, x*-1, x)
# leptokurtic by adding an extra weight near the peak
x = c(
rnorm( 5000, 0, 4 ),
rnorm( 2000, 0, 1 ))
plot_distribution(x)
# platokurtic by adding extra weight in the middle regions
x = c(
rnorm( 5000, 0, 4 ),
rnorm( 1000, 4, 2.5 ),
rnorm( 1000, -4, 2.5 )
)
plot_distribution(x)
# leptokurtic and skewed by adding an extra weight near the peak on the side
x = c(
rnorm( 5000, 0, 4 ),
rnorm( 2000, 1, 2 ))
plot_distribution(x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment