Created
February 16, 2016 16:52
-
-
Save cyrilobolonsky/c7b4473c05b430f53688 to your computer and use it in GitHub Desktop.
R Tutorials | Probability Distributions in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Probability Distributions in R | |
#BINOMINAL DISTRIBUTION | |
?dbinom | |
x <- 0:30 | |
#binominal - density | |
plot(x, dbinom(x, 30, 0.5), type = "h") | |
#binominal - cumulative distribution function | |
plot(x, pbinom(x, 30, 0.5), type = "h") | |
#NORMAL DISTRIBUTION | |
#z-function | |
#plotting the sequence for the x-axis for the normal distribution (from -4 to 4 with the interval of 0.01) | |
x<-seq(-4, 4, 0.01) | |
#plotting the normal distribution | |
#"type=" is responsible for the shape of the curve, the function "xlim=" gives the limits of the curve, and the "main=" produces the name | |
plot(x, dnorm(x), type="l", xlim=c(-4, 4), main="Bell Curve") | |
#alternatively | |
curve(dnorm(x), from= -4, to=4, xlim=c(-4, 4), main="Bell Curve") | |
#plotting the cumulative distribution function | |
plot(x, pnorm(x), type="l") | |
#the quantile function can be used to ask what value for z is at the 25% percentile. | |
qnorm(0.25) | |
#it can also be used to find the interval of values for z that includes 95% of the distribution. | |
#useful for calculating confidence intervals | |
qnorm(c(0.025, 0.975)) | |
#Example: Blood Pressure | |
#We plot the sequence for the normal distribution of the blood pressure among people from 60 to 120 mmHg with the interval of 1 on the x-axis | |
bt <- seq(60, 120, 1) | |
#We plot the normal distribution with the mean of 90 mmHg and the standard deviation of 10 mmHg | |
plot(bt, dnorm(bt, 90, 10), type="l", xlim=c(60, 120), main="Blood Pressure") | |
#cumulative distribution function - pnorm() | |
plot(x, pnorm(x), type="l") | |
#we use the function pnorm() e.g. to calculate the proportion of people with the level of blood pressure equal to 80 mmHg or BELOW, given that the population mean=90 and the standard deviation=10 | |
pnorm(mean=90, sd=10, 80) | |
#alternatively | |
pnorm(80,90,10) | |
#we can round the answer to the closest integer | |
round(pnorm(80,90,10)) | |
#or we can round the number to the first two characters after comma | |
round(pnorm(80,90,10),2) | |
#Statistical Tests in Normal Ditribution | |
#Let's use the normal distribution of blood pressure among people for a one- and two-tailed statistical z-tests | |
#1) one-tailed test | |
#first, let's count the probability of randomly selecting a subject bt 72 mmHg or lower, the so-called p-value which is equal to the surface of the polygon that we are going to draw | |
pnorm(72, 90, 10) | |
#second, we draw a vertical line for 72 (v is the x-value) | |
abline(v=72) | |
#third, we create the coordinates for the polygon which shows the probability of getting the subject bt of 72 mmHg or bellow | |
cord.x <- c(60,seq(60,72,1),72) | |
cord.y <- c(0,dnorm(seq(60, 72, 1), 90, 10),0) | |
?polygon | |
polygon(cord.x,cord.y,col='skyblue') | |
text(70, 0.005, "blue area = p = 0.0359") | |
#2) two-tailed test | |
#if we want to see how the mean value deviates from the searched value (72 mmHg), we conduct a two-tailed test by adding a mirror spaceto the other side of the bell curve | |
cord.x1 <- c(108,seq(108,120,1),120) | |
cord.y1 <- c(0,dnorm(seq(108, 120, 1), 90, 10),0) | |
polygon(cord.x1,cord.y1,col='skyblue') | |
#let's add some text to the graph | |
text(65, 0.005, round(pnorm(72, 90, 10), 3)) | |
text(115, 0.005, round(pnorm(72, 90, 10), 3)) | |
text(75, 0.02, " p = 0.072 " ) | |
#OTHER DISTRIBUTIONS | |
#Student Distribution | |
?dt() | |
#Poisson Distribution | |
?dpois() | |
#Chi-square Distribution | |
?dchisq() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment