mlopatka/BPCI.R Secret

## BPCI.R
library(stats)

# generate a random data frame of hypothetical performance.
#  - each row corresponds to the performance of a fathom rule set on a single page
#  - each column corresponds to the success or failure of correctly identifying a single feature (price, image, title, description)
# This could be extended to evaluate multiple fathom rulesets in parallel by the addition of another index column.
fathom_classification_strategy1 <- data.frame(replicate(4,sample(0:1,100,rep=TRUE)))
fathom_classification_strategy2 <- data.frame(replicate(4,sample(0:1,100,rep=TRUE)))

# Add an indication of the fathom recipe
fathom_classification_strategy1[,'recipe'] <- 'fathom_recipe_1'
fathom_classification_strategy2[,'recipe'] <- 'fathom_recipe_2'

# Concatentate the data from multipel experiments.
exp_data <- rbind(fathom_classification_strategy1, fathom_classification_strategy2)

# Set alpha
alpha = 0.05

# Cast the column (feature) of interest as a factor
feature_1 <- factor(exp_data$X1)

# Count the numebr of points
n = length(feature_1)

# Cast it to a tabe for handy builtin functions
as_table <- table(feature_1)

# Compute the proportion of correct classifications
p_hat = as_table[1]/n

# Calculate the critical z-score
z = qnorm(1-alpha/2)

# Compute the Confidence Intervals for that performance
p_hat
p_hat + c(-1,1)*z*sqrt(p_hat*(1-p_hat)/n)
	library(stats)

	# generate a random data frame of hypothetical performance.
	# - each row corresponds to the performance of a fathom rule set on a single page
	# - each column corresponds to the success or failure of correctly identifying a single feature (price, image, title, description)
	# This could be extended to evaluate multiple fathom rulesets in parallel by the addition of another index column.
	fathom_classification_strategy1 <- data.frame(replicate(4,sample(0:1,100,rep=TRUE)))
	fathom_classification_strategy2 <- data.frame(replicate(4,sample(0:1,100,rep=TRUE)))

	# Add an indication of the fathom recipe
	fathom_classification_strategy1[,'recipe'] <- 'fathom_recipe_1'
	fathom_classification_strategy2[,'recipe'] <- 'fathom_recipe_2'

	# Concatentate the data from multipel experiments.
	exp_data <- rbind(fathom_classification_strategy1, fathom_classification_strategy2)

	# Set alpha
	alpha = 0.05

	# Cast the column (feature) of interest as a factor
	feature_1 <- factor(exp_data$X1)

	# Count the numebr of points
	n = length(feature_1)

	# Cast it to a tabe for handy builtin functions
	as_table <- table(feature_1)

	# Compute the proportion of correct classifications
	p_hat = as_table[1]/n

	# Calculate the critical z-score
	z = qnorm(1-alpha/2)

	# Compute the Confidence Intervals for that performance
	p_hat
	p_hat + c(-1,1)zsqrt(p_hat*(1-p_hat)/n)