datawookie/moving_checksum.R

## moving_checksum.R
library(dplyr)
library(ggplot2)
library(scales)

checksum <- function(bytes) {
	Reduce(bitwXor, bytes)
}

# Generate message with:
#
# - payload
# - CRC byte (based payload but not CMD or FUN)
#
generate.message <- function(length) {
	payload = sample(0:255, length, replace = TRUE)
	#
	c(payload, checksum(payload))
}

# ---------------------------------------------------------------------------------------------------------------------

NEXP = 1000

failure.rate <- function(length) {
	failures = replicate(NEXP, {
		msg = generate.message(length)
		#
		# Extract payload.
		#
		payload = msg[1:length(msg)-1]
		#
		crc = sapply(1:length(payload), function(n) {checksum(payload[1:n])})
		#
		# Find the number of times that the checksum is coincidentally the same as the next number in the payload.
		#
		# payload: 154  59 161 111 127 182 227  37   8 170
		# CRC:         154 161   0 111  16 166  69  96 104 194
		#
		# For example, above the checksum after the first two bytes is the same as the next number in the payload.
		#
		sum(payload[-1] == crc[-length]) > 0
	})
	#
	sum(failures) / NEXP
}

n = 2^(1:10)
rates = sapply(n, failure.rate)

failures = data.frame(n, rates)

alpha = 0.05
Z = qnorm(1 - alpha / 2)
#
# Calculating confidence interval using Normal Approximation. Could do this more rigorously, but this will suffice.
#
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
#
failures = mutate(failures,
	rates.min = rates - Z * sqrt(rates * (1 - rates) / NEXP),
	rates.max = rates + Z * sqrt(rates * (1 - rates) / NEXP)
)

# Theoretical probabilities courtesy of Christine Swart.
#
theory = data.frame(n = 1:1024) %>% transform(theory = 1 - (1 - 1/256)^(n-1))

ggplot(na.omit(failures), aes(x = n)) +
	geom_ribbon(aes(ymin = rates.min, ymax = rates.max), fill = "#ff931f", alpha = 0.65) +
	geom_line(aes(y = rates), color = "#2188e0", lwd = 1) +
	geom_line(data = theory, aes(x = n, y = theory), lty = "dashed", lwd = 1) +
	labs(x = "Payload Length [bytes]", y = "Error Probability") +
	scale_y_continuous(labels = percent) +
	theme_classic()
	library(dplyr)
	library(ggplot2)
	library(scales)

	checksum <- function(bytes) {
	Reduce(bitwXor, bytes)
	}

	# Generate message with:
	#
	# - payload
	# - CRC byte (based payload but not CMD or FUN)
	#
	generate.message <- function(length) {
	payload = sample(0:255, length, replace = TRUE)
	#
	c(payload, checksum(payload))
	}

	# ---------------------------------------------------------------------------------------------------------------------

	NEXP = 1000

	failure.rate <- function(length) {
	failures = replicate(NEXP, {
	msg = generate.message(length)
	#
	# Extract payload.
	#
	payload = msg[1:length(msg)-1]
	#
	crc = sapply(1:length(payload), function(n) {checksum(payload[1:n])})
	#
	# Find the number of times that the checksum is coincidentally the same as the next number in the payload.
	#
	# payload: 154 59 161 111 127 182 227 37 8 170
	# CRC: 154 161 0 111 16 166 69 96 104 194
	#
	# For example, above the checksum after the first two bytes is the same as the next number in the payload.
	#
	sum(payload[-1] == crc[-length]) > 0
	})
	#
	sum(failures) / NEXP
	}

	n = 2^(1:10)
	rates = sapply(n, failure.rate)

	failures = data.frame(n, rates)

	alpha = 0.05
	Z = qnorm(1 - alpha / 2)
	#
	# Calculating confidence interval using Normal Approximation. Could do this more rigorously, but this will suffice.
	#
	# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
	#
	failures = mutate(failures,
	rates.min = rates - Z * sqrt(rates * (1 - rates) / NEXP),
	rates.max = rates + Z * sqrt(rates * (1 - rates) / NEXP)
	)

	# Theoretical probabilities courtesy of Christine Swart.
	#
	theory = data.frame(n = 1:1024) %>% transform(theory = 1 - (1 - 1/256)^(n-1))

	ggplot(na.omit(failures), aes(x = n)) +
	geom_ribbon(aes(ymin = rates.min, ymax = rates.max), fill = "#ff931f", alpha = 0.65) +
	geom_line(aes(y = rates), color = "#2188e0", lwd = 1) +
	geom_line(data = theory, aes(x = n, y = theory), lty = "dashed", lwd = 1) +
	labs(x = "Payload Length [bytes]", y = "Error Probability") +
	scale_y_continuous(labels = percent) +
	theme_classic()