jpcarrascal/survey_sample_size.R

## survey_sample_size.R
# Simple function to determine optimum sample size for a survey.
# A lot of websites provide online calculators, but here is the code so you can do it yourself in R.
# Source:
#     Krejcie, Robert V., and Daryle W. Morgan. "Determining sample size for research activities."
#     Educational and psychological measurement 30.3 (1970): 607-610.
#     https://journals.sagepub.com/doi/abs/10.1177/001316447003000308?journalCode=epma
#     Downloadable PDF: https://home.kku.ac.th/sompong/guest_speaker/KrejcieandMorgan_article.pdf
#
# The paper provides a table with chi-squared distribution values for 1 degree of freedom.
# R function qchisq() generates the right value from a given confidence level, so the table is not needed.
# Function input parameters:
#
# N: population size
# err: desired margin of error. E.g. for 5%,  err = 0.05
# cl: confidence level. E.g. for 95%, cl = 0.95
# P = response distribution (population proportion in the source). Often 50% is used as it gives larger sample size.
#
# Written by JP Carrascal: www.jpcarrascal.com, www.github.com/jpcarrascal
#

sample_size <- function(N, err, cl, P=0.5)
{
  chisqV <- qchisq(cl, df=1)
  ssize <- ( chisqV*N*P*(1-P) ) / ( err^2*(N-1) + chisqV*P*(1-P) )
  return ( ssize )
}
	# Simple function to determine optimum sample size for a survey.
	# A lot of websites provide online calculators, but here is the code so you can do it yourself in R.
	# Source:
	# Krejcie, Robert V., and Daryle W. Morgan. "Determining sample size for research activities."
	# Educational and psychological measurement 30.3 (1970): 607-610.
	# https://journals.sagepub.com/doi/abs/10.1177/001316447003000308?journalCode=epma
	# Downloadable PDF: https://home.kku.ac.th/sompong/guest_speaker/KrejcieandMorgan_article.pdf
	#
	# The paper provides a table with chi-squared distribution values for 1 degree of freedom.
	# R function qchisq() generates the right value from a given confidence level, so the table is not needed.
	# Function input parameters:
	#
	# N: population size
	# err: desired margin of error. E.g. for 5%, err = 0.05
	# cl: confidence level. E.g. for 95%, cl = 0.95
	# P = response distribution (population proportion in the source). Often 50% is used as it gives larger sample size.
	#
	# Written by JP Carrascal: www.jpcarrascal.com, www.github.com/jpcarrascal
	#

	sample_size <- function(N, err, cl, P=0.5)
	{
	chisqV <- qchisq(cl, df=1)
	ssize <- ( chisqVNP(1-P) ) / ( err^2(N-1) + chisqVP(1-P) )
	return ( ssize )
	}