Skip to content

Instantly share code, notes, and snippets.

@datawookie
Last active December 29, 2015 06:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save datawookie/e5a80e1c8abc35bd5093 to your computer and use it in GitHub Desktop.
Save datawookie/e5a80e1c8abc35bd5093 to your computer and use it in GitHub Desktop.
Rolling Checksum
library(dplyr)
library(ggplot2)
library(scales)
checksum <- function(bytes) {
Reduce(bitwXor, bytes)
}
# Generate message with:
#
# - payload
# - CRC byte (based payload but not CMD or FUN)
#
generate.message <- function(length) {
payload = sample(0:255, length, replace = TRUE)
#
c(payload, checksum(payload))
}
# ---------------------------------------------------------------------------------------------------------------------
NEXP = 1000
failure.rate <- function(length) {
failures = replicate(NEXP, {
msg = generate.message(length)
#
# Extract payload.
#
payload = msg[1:length(msg)-1]
#
crc = sapply(1:length(payload), function(n) {checksum(payload[1:n])})
#
# Find the number of times that the checksum is coincidentally the same as the next number in the payload.
#
# payload: 154 59 161 111 127 182 227 37 8 170
# CRC: 154 161 0 111 16 166 69 96 104 194
#
# For example, above the checksum after the first two bytes is the same as the next number in the payload.
#
sum(payload[-1] == crc[-length]) > 0
})
#
sum(failures) / NEXP
}
n = 2^(1:10)
rates = sapply(n, failure.rate)
failures = data.frame(n, rates)
alpha = 0.05
Z = qnorm(1 - alpha / 2)
#
# Calculating confidence interval using Normal Approximation. Could do this more rigorously, but this will suffice.
#
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
#
failures = mutate(failures,
rates.min = rates - Z * sqrt(rates * (1 - rates) / NEXP),
rates.max = rates + Z * sqrt(rates * (1 - rates) / NEXP)
)
# Theoretical probabilities courtesy of Christine Swart.
#
theory = data.frame(n = 1:1024) %>% transform(theory = 1 - (1 - 1/256)^(n-1))
ggplot(na.omit(failures), aes(x = n)) +
geom_ribbon(aes(ymin = rates.min, ymax = rates.max), fill = "#ff931f", alpha = 0.65) +
geom_line(aes(y = rates), color = "#2188e0", lwd = 1) +
geom_line(data = theory, aes(x = n, y = theory), lty = "dashed", lwd = 1) +
labs(x = "Payload Length [bytes]", y = "Error Probability") +
scale_y_continuous(labels = percent) +
theme_classic()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment