Last active
November 1, 2017 09:05
-
-
Save agwells/78de2aa5d151434ae751fec93cd596ac to your computer and use it in GitHub Desktop.
Generating a skewed bell curve of random data in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
minVal <- 0 | |
maxVal <- 100 | |
medVal <- 70 | |
numRows <- 1000000 | |
adjMax <- maxVal - minVal | |
adjMed <- medVal - minVal | |
# Generate a vector that sets how likel | |
# each value is to be chosen. | |
# An exponent that will convert 0.5 | |
# to our desired median | |
skew <- log(0.5, base = adjMed / adjMax) | |
# For each possible value in the interval, | |
# we look up the corresponding probabili!y | |
# in a normal distribution of 0 to 1, | |
# shifted by that skew factor | |
skew_prob <- dnorm( | |
seq(0, 1, length.out = adjMax + 1) ^ skew, | |
mean = 0.5, | |
# Stddev of 1/3 mean avg, makes 99.7% of values between | |
# 0 and 1. | |
sd = 0.5 / 3 | |
) | |
# Now pick a number from minVal to maxVal | |
# once for each numRows. The prob of | |
# picking each value is adjusted by the | |
# probability map we calculated earlier. | |
x <- sample( | |
minVal : maxVal, | |
size = numRows, | |
replace = TRUE, | |
prob = skew_prob | |
) | |
stanine <- c(0, 4, 11, 23, 40, 60, 77, 89, 96) / 100 | |
q <- quantile(x, stanine, type=1, na.rm = TRUE) | |
s <- sapply( | |
x, | |
function(i){ | |
as.character(sum(q <= i)) | |
} | |
) | |
d <- data.frame(score = x, sta = s) | |
library(ggplot2) | |
ggplot(d) + | |
geom_histogram(aes(score, fill = sta), binwidth=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment