hoelzro/binary-strategy-steps.R

## binary-strategy-steps.R
#!/usr/bin/env Rscript

# Multiply multiplier * expr, but only
# evaluate expr if multiplier is non-zero.
cond_mult <- function(multiplier, expr) {
    if(multiplier != 0) {
        return(multiplier * expr)
    } else {
        return(0)
    }
}

# calculate the number of expected steps executing
# the algorithm should take
calculate_expected_steps <- function(total, num_bad) {
    # if we have no items, no steps are needed
    if(total == 0) {
        return(0)
    }

    # if all items are bad, no steps are needed
    if(total == num_bad) {
        return(0)
    }

    # if we have exactly one bad item, we can divide and conquer
    # in log2(n) steps to figure out which is bad
    if(num_bad == 1) {
        return(log2(total))
    }

    # if we have exactly one *good* item, we can use the same
    # logic as above to determine which it is, therefore all
    # others are bad
    if((num_bad + 1) == total) {
        return(log2(total))
    }

    sample_size <- ceiling(total / 2)
    remainder <- total - sample_size
    num_good <- total - num_bad

    # simulate taking half the items and testing them.
    # using a hypergeometric distribution (sampling items without
    # putting them back), we can determine the probability
    # of a given number occurring within that half
    p <- dhyper(0:num_bad, m=num_bad, n=num_good, k=sample_size)

    # for the case where we found no bad items, we can discard
    # the half we just tested without additional work. We continue
    # looking for all of the bad items in the half we didn't test
    steps <- cond_mult(p[1], calculate_expected_steps(remainder, num_bad))

    for(i in 2:num_bad) {
        # we found n (i-1) bad items in the tested half. look for that
        # number of items in the tested half, and the remaining bad
        # items in the untested half
        steps <- append(steps, cond_mult(p[i],
            calculate_expected_steps(sample_size, i - 1) +
            calculate_expected_steps(remainder, num_bad - (i - 1))))
    }

    # if we found all of the bad items, we can discard the half we
    # didn't test without additional work.  We continue looking for
    # the bad items in the half we *did* test
    steps <- append(steps, cond_mult(p[num_bad+1],
        calculate_expected_steps(sample_size, num_bad)))

    return(1 + sum(steps))
}

args <- commandArgs(trailingOnly=T)
total <- as.integer(args[1])
num_bad <- as.integer(args[2])

steps <- calculate_expected_steps(total, num_bad)
print(steps)
	#!/usr/bin/env Rscript

	# Multiply multiplier * expr, but only
	# evaluate expr if multiplier is non-zero.
	cond_mult <- function(multiplier, expr) {
	if(multiplier != 0) {
	return(multiplier * expr)
	} else {
	return(0)
	}
	}

	# calculate the number of expected steps executing
	# the algorithm should take
	calculate_expected_steps <- function(total, num_bad) {
	# if we have no items, no steps are needed
	if(total == 0) {
	return(0)
	}

	# if all items are bad, no steps are needed
	if(total == num_bad) {
	return(0)
	}

	# if we have exactly one bad item, we can divide and conquer
	# in log2(n) steps to figure out which is bad
	if(num_bad == 1) {
	return(log2(total))
	}

	# if we have exactly one good item, we can use the same
	# logic as above to determine which it is, therefore all
	# others are bad
	if((num_bad + 1) == total) {
	return(log2(total))
	}

	sample_size <- ceiling(total / 2)
	remainder <- total - sample_size
	num_good <- total - num_bad

	# simulate taking half the items and testing them.
	# using a hypergeometric distribution (sampling items without
	# putting them back), we can determine the probability
	# of a given number occurring within that half
	p <- dhyper(0:num_bad, m=num_bad, n=num_good, k=sample_size)

	# for the case where we found no bad items, we can discard
	# the half we just tested without additional work. We continue
	# looking for all of the bad items in the half we didn't test
	steps <- cond_mult(p[1], calculate_expected_steps(remainder, num_bad))

	for(i in 2:num_bad) {
	# we found n (i-1) bad items in the tested half. look for that
	# number of items in the tested half, and the remaining bad
	# items in the untested half
	steps <- append(steps, cond_mult(p[i],
	calculate_expected_steps(sample_size, i - 1) +
	calculate_expected_steps(remainder, num_bad - (i - 1))))
	}

	# if we found all of the bad items, we can discard the half we
	# didn't test without additional work. We continue looking for
	# the bad items in the half we did test
	steps <- append(steps, cond_mult(p[num_bad+1],
	calculate_expected_steps(sample_size, num_bad)))

	return(1 + sum(steps))
	}

	args <- commandArgs(trailingOnly=T)
	total <- as.integer(args[1])
	num_bad <- as.integer(args[2])

	steps <- calculate_expected_steps(total, num_bad)
	print(steps)