Skip to content

Instantly share code, notes, and snippets.

@hoelzro
Created June 15, 2016 12:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hoelzro/ec31e1e94f179ce81e7ff1e5d33513eb to your computer and use it in GitHub Desktop.
Save hoelzro/ec31e1e94f179ce81e7ff1e5d33513eb to your computer and use it in GitHub Desktop.
#!/usr/bin/env Rscript
# Multiply multiplier * expr, but only
# evaluate expr if multiplier is non-zero.
cond_mult <- function(multiplier, expr) {
if(multiplier != 0) {
return(multiplier * expr)
} else {
return(0)
}
}
# calculate the number of expected steps executing
# the algorithm should take
calculate_expected_steps <- function(total, num_bad) {
# if we have no items, no steps are needed
if(total == 0) {
return(0)
}
# if all items are bad, no steps are needed
if(total == num_bad) {
return(0)
}
# if we have exactly one bad item, we can divide and conquer
# in log2(n) steps to figure out which is bad
if(num_bad == 1) {
return(log2(total))
}
# if we have exactly one *good* item, we can use the same
# logic as above to determine which it is, therefore all
# others are bad
if((num_bad + 1) == total) {
return(log2(total))
}
sample_size <- ceiling(total / 2)
remainder <- total - sample_size
num_good <- total - num_bad
# simulate taking half the items and testing them.
# using a hypergeometric distribution (sampling items without
# putting them back), we can determine the probability
# of a given number occurring within that half
p <- dhyper(0:num_bad, m=num_bad, n=num_good, k=sample_size)
# for the case where we found no bad items, we can discard
# the half we just tested without additional work. We continue
# looking for all of the bad items in the half we didn't test
steps <- cond_mult(p[1], calculate_expected_steps(remainder, num_bad))
for(i in 2:num_bad) {
# we found n (i-1) bad items in the tested half. look for that
# number of items in the tested half, and the remaining bad
# items in the untested half
steps <- append(steps, cond_mult(p[i],
calculate_expected_steps(sample_size, i - 1) +
calculate_expected_steps(remainder, num_bad - (i - 1))))
}
# if we found all of the bad items, we can discard the half we
# didn't test without additional work. We continue looking for
# the bad items in the half we *did* test
steps <- append(steps, cond_mult(p[num_bad+1],
calculate_expected_steps(sample_size, num_bad)))
return(1 + sum(steps))
}
args <- commandArgs(trailingOnly=T)
total <- as.integer(args[1])
num_bad <- as.integer(args[2])
steps <- calculate_expected_steps(total, num_bad)
print(steps)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment