chrishanretty/s_v_sd_moe.R

## s_v_sd_moe.R
## Let's suppose that polls work as they are supposed to
## Let's take the average of the last six polls
## Let's call p_1 the poll average proportion voting for the Social Democrats
## and p_2 the poll average proportion voting for the Sweden Democrats
p_1 <- 0.246
p_2 <- 0.189

## In this case, the standard error of the difference between the two
## proportions is equal to the square root of the sum of the squares
## of the standard error attaching to each proportion.

## The standard error for each proportion is equal to the sqare root
## of the proportion times one minus the proportion, divided by the
## sample size. Let's say that we have an "as-if" sample size of
## 1,000.  Let's call the standard errors se_1 and se_2.
n <- 1000
se_1 <- sqrt((p_1 * (1 - p_1)) / n)
se_2 <- sqrt((p_2 * (1 - p_2)) / n)

## We can the calculate the combined se.
comb_se <- sqrt(se_1 ^ 2 + se_2 ^ 2)

## This is slightly higher than the individual standard errors
comb_se > se_1
comb_se > se_2

## Now let's work out the 95% confidence interval for the difference. This is approximately two standard deviations in either direction
lower <- (p_1 - p_2) - comb_se * 2
upper <- (p_1 - p_2) + comb_se * 2

## In fact, we can work out the probability of a result so extreme as
## to put the Sweden Democrats ahead by taking the difference in
## proportions, converting it to a z-value, and working out the
## probability of z-values that extreme or more, according to a normal
## distribution table.
zscore <- (p_1 - p_2) / comb_se
## pnorm = cumulative density function
## 1 - pnorm = prob of results more extreme than
1 - pnorm(zscore)
	## Let's suppose that polls work as they are supposed to
	## Let's take the average of the last six polls
	## Let's call p_1 the poll average proportion voting for the Social Democrats
	## and p_2 the poll average proportion voting for the Sweden Democrats
	p_1 <- 0.246
	p_2 <- 0.189

	## In this case, the standard error of the difference between the two
	## proportions is equal to the square root of the sum of the squares
	## of the standard error attaching to each proportion.

	## The standard error for each proportion is equal to the sqare root
	## of the proportion times one minus the proportion, divided by the
	## sample size. Let's say that we have an "as-if" sample size of
	## 1,000. Let's call the standard errors se_1 and se_2.
	n <- 1000
	se_1 <- sqrt((p_1 * (1 - p_1)) / n)
	se_2 <- sqrt((p_2 * (1 - p_2)) / n)

	## We can the calculate the combined se.
	comb_se <- sqrt(se_1 ^ 2 + se_2 ^ 2)

	## This is slightly higher than the individual standard errors
	comb_se > se_1
	comb_se > se_2

	## Now let's work out the 95% confidence interval for the difference. This is approximately two standard deviations in either direction
	lower <- (p_1 - p_2) - comb_se * 2
	upper <- (p_1 - p_2) + comb_se * 2

	## In fact, we can work out the probability of a result so extreme as
	## to put the Sweden Democrats ahead by taking the difference in
	## proportions, converting it to a z-value, and working out the
	## probability of z-values that extreme or more, according to a normal
	## distribution table.
	zscore <- (p_1 - p_2) / comb_se
	## pnorm = cumulative density function
	## 1 - pnorm = prob of results more extreme than
	1 - pnorm(zscore)