Skip to content

Instantly share code, notes, and snippets.

@BroVic
Last active December 1, 2023 20:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BroVic/d02cd19f27c1deaa4ce9548dd9fd7c5b to your computer and use it in GitHub Desktop.
Save BroVic/d02cd19f27c1deaa4ce9548dd9fd7c5b to your computer and use it in GitHub Desktop.
Parse a vector of Electronic Product Codes (EPCs)
# Receives as input a vector containing EPC strings and returns a data frame
# with each component of a string assigned to the appropriate column of the
# data frame, and the data frame will have as many rows as there are strings.
parseEPC <- function(epc) {
# internal functions
## Creates a regex anchor for the beginning of a string
rgxify <- function(x) {
stopifnot(exprs = {
is.character(x)
length(x) == 1L
})
paste0("^", x)
}
## get the value of a given part of the EPC string
## params: str - The EPC string (or what's left of it)
## lead - The 2-character sequence for the given part
## no.chars - The length of this part of the EPC string (if known)
## Return value: The extracted part of the EPC string
get_val <- function(str, lead, no.chars = NULL) {
lead.rgx <- rgxify(lead)
numLeadDigits <- nchar(lead)
pos <- regexpr(lead.rgx, str) + numLeadDigits
if (is.null(no.chars)) {
serial.pos <- unlist(gregexpr("21", str))
no.chars <- if (serial.pos > 1L && lead.rgx == "^10")
nchar(substr(str, pos, serial.pos - 1))
else
nchar(str) - numLeadDigits
return(get_val(str, lead, no.chars))
}
substr(str, pos, pos + (no.chars - 1))
}
# main function logic
if (!is.character(epc))
stop("'epc' should be a character vector")
leadnames <- c("gtin", "date", "batch", "serial")
leads <- structure(c("01", "17", "10", "21"), names = leadnames)
lastValue <- NA
ncols <- 4L
result <- matrix("", ncol = ncols, dimnames = list(NULL, leadnames))
## Apply an anonymous function to each element of the EPC string vector.
## This function contains a loop which uses the lead 2-digit sequence
## that applies to each component of the EPC string to extract the
## appropriate value. When found, that value is extracted and assigned
## to the its rightful column in the `results` matrix, and thereafter
## completely removed from the string. Thus, with each successful iteration,
## the EPC string is shortened. If after a correct value is extracted, the
## next 2-digit sequence should be valid; if this is not so, it means that
## the EPC string is not valid and the function fails with an error.
lapply(epc, function(str) {
originalstr <- str
for (i in seq_len(ncols)) {
lead <- substr(str, 0, 2)
leadindex <- which(leads %in% lead)
if (length(leadindex) != 1L) {
warning(
"The last value extracted from ",
sQuote(originalstr),
" was ",
sQuote(lastValue),
" and the current lead digits are ",
sQuote(lead),
call. = FALSE
)
stop("Malformed EPC string", call. = FALSE)
}
leadname <- leadnames[leadindex]
val <- if (leadname == "gtin") {
comp <- get_val(str, lead, 14)
comp
}
else if (leadname == "date") {
comp <- get_val(str, lead, 6)
comp |>
as.Date(format = "%y%m%d") |>
format("%Y/%m/%d")
}
else if (leadname == "serial") {
comp <- get_val(str, lead)
toupper(comp)
}
else {
comp <- get_val(str, lead)
comp
}
result[nrow(result), leadname] <<- val
lastValue <<- comp
str <- sub(paste0(rgxify(lead), comp), "", str)
}
result <<- rbind(result, rep(NA_character_, ncols))
})
## After all is done, convert the matrix to a data frame and
## remove the extra row that was added by the preceding loop.
df <- as.data.frame(result)
df[complete.cases(df), ]
}
source("epc.R")
epc1 <- "01061590000001081721012210FG4676RTY21AYBMrds16z0k"
parseEPC(epc1)
epc2 <- "0103664798013962112109031724083110V3K321V21136P3HP961F4WE"
parseEPC(epc2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment