Created
June 15, 2016 12:24
-
-
Save hoelzro/ec31e1e94f179ce81e7ff1e5d33513eb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
# Multiply multiplier * expr, but only | |
# evaluate expr if multiplier is non-zero. | |
cond_mult <- function(multiplier, expr) { | |
if(multiplier != 0) { | |
return(multiplier * expr) | |
} else { | |
return(0) | |
} | |
} | |
# calculate the number of expected steps executing | |
# the algorithm should take | |
calculate_expected_steps <- function(total, num_bad) { | |
# if we have no items, no steps are needed | |
if(total == 0) { | |
return(0) | |
} | |
# if all items are bad, no steps are needed | |
if(total == num_bad) { | |
return(0) | |
} | |
# if we have exactly one bad item, we can divide and conquer | |
# in log2(n) steps to figure out which is bad | |
if(num_bad == 1) { | |
return(log2(total)) | |
} | |
# if we have exactly one *good* item, we can use the same | |
# logic as above to determine which it is, therefore all | |
# others are bad | |
if((num_bad + 1) == total) { | |
return(log2(total)) | |
} | |
sample_size <- ceiling(total / 2) | |
remainder <- total - sample_size | |
num_good <- total - num_bad | |
# simulate taking half the items and testing them. | |
# using a hypergeometric distribution (sampling items without | |
# putting them back), we can determine the probability | |
# of a given number occurring within that half | |
p <- dhyper(0:num_bad, m=num_bad, n=num_good, k=sample_size) | |
# for the case where we found no bad items, we can discard | |
# the half we just tested without additional work. We continue | |
# looking for all of the bad items in the half we didn't test | |
steps <- cond_mult(p[1], calculate_expected_steps(remainder, num_bad)) | |
for(i in 2:num_bad) { | |
# we found n (i-1) bad items in the tested half. look for that | |
# number of items in the tested half, and the remaining bad | |
# items in the untested half | |
steps <- append(steps, cond_mult(p[i], | |
calculate_expected_steps(sample_size, i - 1) + | |
calculate_expected_steps(remainder, num_bad - (i - 1)))) | |
} | |
# if we found all of the bad items, we can discard the half we | |
# didn't test without additional work. We continue looking for | |
# the bad items in the half we *did* test | |
steps <- append(steps, cond_mult(p[num_bad+1], | |
calculate_expected_steps(sample_size, num_bad))) | |
return(1 + sum(steps)) | |
} | |
args <- commandArgs(trailingOnly=T) | |
total <- as.integer(args[1]) | |
num_bad <- as.integer(args[2]) | |
steps <- calculate_expected_steps(total, num_bad) | |
print(steps) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment