Mathias-Fuchs/estSquareOfMean.R

## estSquareOfMean.R
                                        # our sample size
n <- 10

                                        # generate all subsets of size 2
co <- combn(n, 2)


                                        # confirm that the false friend is biased by generating a lot of samples
f <- rep(0, 50000)
g <- rep(0, 50000)

for (k in 1:50000){
    if (k%%100==0) print(k)
    s <- sample(6, n, replace=TRUE)

                                        # the false friend
    F <- mean(s)^2
                                        # the good friend
    G <- mean(apply(co, 2, function(xx) prod(s[xx])))
    f[k] <- F
    g[k] <- G
}
                                        # confirm that the false friend is biased, and the good friend isn't, by checking that the false friend
print(t.test(f)) # doesn't contain the true value 12.25
print(t.test(g)) # contains the true value 12.25


                                        # let's look at the particular example
s <- c(2, 2, 4, 6, 2, 6, 4, 5, 4, 6);
n <- length(s)
G <- mean(apply(combn(n, 2), 2, function(xx) prod(s[xx])))
H <- mean(apply(combn(n, 4), 2, function(xx) prod(s[xx])))
K <- G^2 - H
t <- qt(1-0.05/2, df = n-1)
print(paste(sep="", "[", G - t * sqrt(K), ", ", G + t * sqrt(K), "]"))


                                        # what happens on a larger sample size?
n <- 200
s <- sample(6, n, replace=TRUE)
G <- mean(apply(combn(n, 2), 2, function(xx) prod(s[xx])))
H <- mean(apply(combn(n, 4), 2, function(xx) prod(s[xx])))
K <- G^2 - H
t <- qt(1-0.05/2, df = n-1)
print(paste(sep="", "[", G - t * sqrt(K), ", ", G + t * sqrt(K), "]"))
	# our sample size
	n <- 10

	# generate all subsets of size 2
	co <- combn(n, 2)


	# confirm that the false friend is biased by generating a lot of samples
	f <- rep(0, 50000)
	g <- rep(0, 50000)

	for (k in 1:50000){
	if (k%%100==0) print(k)
	s <- sample(6, n, replace=TRUE)

	# the false friend
	F <- mean(s)^2
	# the good friend
	G <- mean(apply(co, 2, function(xx) prod(s[xx])))
	f[k] <- F
	g[k] <- G
	}
	# confirm that the false friend is biased, and the good friend isn't, by checking that the false friend
	print(t.test(f)) # doesn't contain the true value 12.25
	print(t.test(g)) # contains the true value 12.25



	# let's look at the particular example
	s <- c(2, 2, 4, 6, 2, 6, 4, 5, 4, 6);
	n <- length(s)
	G <- mean(apply(combn(n, 2), 2, function(xx) prod(s[xx])))
	H <- mean(apply(combn(n, 4), 2, function(xx) prod(s[xx])))
	K <- G^2 - H
	t <- qt(1-0.05/2, df = n-1)
	print(paste(sep="", "[", G - t * sqrt(K), ", ", G + t * sqrt(K), "]"))


	# what happens on a larger sample size?
	n <- 200
	s <- sample(6, n, replace=TRUE)
	G <- mean(apply(combn(n, 2), 2, function(xx) prod(s[xx])))
	H <- mean(apply(combn(n, 4), 2, function(xx) prod(s[xx])))
	K <- G^2 - H
	t <- qt(1-0.05/2, df = n-1)
	print(paste(sep="", "[", G - t * sqrt(K), ", ", G + t * sqrt(K), "]"))