cyrilobolonsky/gist:c7b4473c05b430f53688

## gistfile1.txt
#Probability Distributions in R


#BINOMINAL DISTRIBUTION
?dbinom
x <- 0:30
#binominal - density
plot(x, dbinom(x, 30, 0.5), type = "h")
#binominal - cumulative distribution function
plot(x, pbinom(x, 30, 0.5), type = "h")


#NORMAL DISTRIBUTION
#z-function
#plotting the sequence for the x-axis for the normal distribution (from -4 to 4 with the interval of 0.01)
x<-seq(-4, 4, 0.01)
#plotting the normal distribution
#"type=" is responsible for the shape of the curve, the function "xlim=" gives the limits of the curve, and the "main=" produces the name
plot(x, dnorm(x), type="l", xlim=c(-4, 4), main="Bell Curve")
#alternatively
curve(dnorm(x), from= -4, to=4, xlim=c(-4, 4), main="Bell Curve")
#plotting the cumulative distribution function
plot(x, pnorm(x), type="l")
#the quantile function can be used to ask what value for z is at the 25% percentile.
qnorm(0.25)
#it can also be used to find the interval of values for z that includes 95% of the distribution.
#useful for calculating confidence intervals
qnorm(c(0.025, 0.975))

#Example: Blood Pressure
#We plot the sequence for the normal distribution of the blood pressure among people from 60 to 120 mmHg with the interval of 1 on the x-axis
bt <- seq(60, 120, 1)
#We plot the normal distribution with the mean of 90 mmHg and the standard deviation of 10 mmHg
plot(bt, dnorm(bt, 90, 10), type="l", xlim=c(60, 120), main="Blood Pressure")
#cumulative distribution function - pnorm()
plot(x, pnorm(x), type="l")
#we use the function pnorm() e.g. to calculate the proportion of people with the level of blood pressure equal to 80 mmHg or BELOW, given that the population mean=90 and the standard deviation=10
pnorm(mean=90, sd=10, 80)
#alternatively
pnorm(80,90,10)
#we can round the answer to the closest integer
round(pnorm(80,90,10))
#or we can round the number to the first two characters after comma
round(pnorm(80,90,10),2)

#Statistical Tests in Normal Ditribution
#Let's use the normal distribution of blood pressure among people for a one- and two-tailed statistical z-tests
#1) one-tailed test
#first, let's count the probability of randomly selecting a subject bt 72 mmHg or lower, the so-called p-value which is equal to the surface of the polygon that we are going to draw
pnorm(72, 90, 10)
#second, we draw a vertical line for 72 (v is the x-value)
abline(v=72)
#third, we create the coordinates for the polygon which shows the probability of getting the subject bt of 72 mmHg or bellow
cord.x <- c(60,seq(60,72,1),72)
cord.y <- c(0,dnorm(seq(60, 72, 1), 90, 10),0)
?polygon
polygon(cord.x,cord.y,col='skyblue')
text(70, 0.005, "blue area = p = 0.0359")
#2) two-tailed test
#if we want to see how the mean value deviates from the searched value (72 mmHg), we conduct a two-tailed test by adding a mirror spaceto the other side of the bell curve
cord.x1 <- c(108,seq(108,120,1),120)
cord.y1 <- c(0,dnorm(seq(108, 120, 1), 90, 10),0)
polygon(cord.x1,cord.y1,col='skyblue')
#let's add some text to the graph
text(65, 0.005, round(pnorm(72, 90, 10), 3))
text(115, 0.005, round(pnorm(72, 90, 10), 3))
text(75, 0.02,  " p = 0.072 "  )

#OTHER DISTRIBUTIONS
#Student Distribution
?dt()
#Poisson Distribution
?dpois()
#Chi-square Distribution
?dchisq()
	#Probability Distributions in R


	#BINOMINAL DISTRIBUTION
	?dbinom
	x <- 0:30
	#binominal - density
	plot(x, dbinom(x, 30, 0.5), type = "h")
	#binominal - cumulative distribution function
	plot(x, pbinom(x, 30, 0.5), type = "h")


	#NORMAL DISTRIBUTION
	#z-function
	#plotting the sequence for the x-axis for the normal distribution (from -4 to 4 with the interval of 0.01)
	x<-seq(-4, 4, 0.01)
	#plotting the normal distribution
	#"type=" is responsible for the shape of the curve, the function "xlim=" gives the limits of the curve, and the "main=" produces the name
	plot(x, dnorm(x), type="l", xlim=c(-4, 4), main="Bell Curve")
	#alternatively
	curve(dnorm(x), from= -4, to=4, xlim=c(-4, 4), main="Bell Curve")
	#plotting the cumulative distribution function
	plot(x, pnorm(x), type="l")
	#the quantile function can be used to ask what value for z is at the 25% percentile.
	qnorm(0.25)
	#it can also be used to find the interval of values for z that includes 95% of the distribution.
	#useful for calculating confidence intervals
	qnorm(c(0.025, 0.975))

	#Example: Blood Pressure
	#We plot the sequence for the normal distribution of the blood pressure among people from 60 to 120 mmHg with the interval of 1 on the x-axis
	bt <- seq(60, 120, 1)
	#We plot the normal distribution with the mean of 90 mmHg and the standard deviation of 10 mmHg
	plot(bt, dnorm(bt, 90, 10), type="l", xlim=c(60, 120), main="Blood Pressure")
	#cumulative distribution function - pnorm()
	plot(x, pnorm(x), type="l")
	#we use the function pnorm() e.g. to calculate the proportion of people with the level of blood pressure equal to 80 mmHg or BELOW, given that the population mean=90 and the standard deviation=10
	pnorm(mean=90, sd=10, 80)
	#alternatively
	pnorm(80,90,10)
	#we can round the answer to the closest integer
	round(pnorm(80,90,10))
	#or we can round the number to the first two characters after comma
	round(pnorm(80,90,10),2)

	#Statistical Tests in Normal Ditribution
	#Let's use the normal distribution of blood pressure among people for a one- and two-tailed statistical z-tests
	#1) one-tailed test
	#first, let's count the probability of randomly selecting a subject bt 72 mmHg or lower, the so-called p-value which is equal to the surface of the polygon that we are going to draw
	pnorm(72, 90, 10)
	#second, we draw a vertical line for 72 (v is the x-value)
	abline(v=72)
	#third, we create the coordinates for the polygon which shows the probability of getting the subject bt of 72 mmHg or bellow
	cord.x <- c(60,seq(60,72,1),72)
	cord.y <- c(0,dnorm(seq(60, 72, 1), 90, 10),0)
	?polygon
	polygon(cord.x,cord.y,col='skyblue')
	text(70, 0.005, "blue area = p = 0.0359")
	#2) two-tailed test
	#if we want to see how the mean value deviates from the searched value (72 mmHg), we conduct a two-tailed test by adding a mirror spaceto the other side of the bell curve
	cord.x1 <- c(108,seq(108,120,1),120)
	cord.y1 <- c(0,dnorm(seq(108, 120, 1), 90, 10),0)
	polygon(cord.x1,cord.y1,col='skyblue')
	#let's add some text to the graph
	text(65, 0.005, round(pnorm(72, 90, 10), 3))
	text(115, 0.005, round(pnorm(72, 90, 10), 3))
	text(75, 0.02, " p = 0.072 " )

	#OTHER DISTRIBUTIONS
	#Student Distribution
	?dt()
	#Poisson Distribution
	?dpois()
	#Chi-square Distribution
	?dchisq()