Last active
December 1, 2023 20:21
-
-
Save BroVic/d02cd19f27c1deaa4ce9548dd9fd7c5b to your computer and use it in GitHub Desktop.
Parse a vector of Electronic Product Codes (EPCs)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Receives as input a vector containing EPC strings and returns a data frame | |
# with each component of a string assigned to the appropriate column of the | |
# data frame, and the data frame will have as many rows as there are strings. | |
parseEPC <- function(epc) { | |
# internal functions | |
## Creates a regex anchor for the beginning of a string | |
rgxify <- function(x) { | |
stopifnot(exprs = { | |
is.character(x) | |
length(x) == 1L | |
}) | |
paste0("^", x) | |
} | |
## get the value of a given part of the EPC string | |
## params: str - The EPC string (or what's left of it) | |
## lead - The 2-character sequence for the given part | |
## no.chars - The length of this part of the EPC string (if known) | |
## Return value: The extracted part of the EPC string | |
get_val <- function(str, lead, no.chars = NULL) { | |
lead.rgx <- rgxify(lead) | |
numLeadDigits <- nchar(lead) | |
pos <- regexpr(lead.rgx, str) + numLeadDigits | |
if (is.null(no.chars)) { | |
serial.pos <- unlist(gregexpr("21", str)) | |
no.chars <- if (serial.pos > 1L && lead.rgx == "^10") | |
nchar(substr(str, pos, serial.pos - 1)) | |
else | |
nchar(str) - numLeadDigits | |
return(get_val(str, lead, no.chars)) | |
} | |
substr(str, pos, pos + (no.chars - 1)) | |
} | |
# main function logic | |
if (!is.character(epc)) | |
stop("'epc' should be a character vector") | |
leadnames <- c("gtin", "date", "batch", "serial") | |
leads <- structure(c("01", "17", "10", "21"), names = leadnames) | |
lastValue <- NA | |
ncols <- 4L | |
result <- matrix("", ncol = ncols, dimnames = list(NULL, leadnames)) | |
## Apply an anonymous function to each element of the EPC string vector. | |
## This function contains a loop which uses the lead 2-digit sequence | |
## that applies to each component of the EPC string to extract the | |
## appropriate value. When found, that value is extracted and assigned | |
## to the its rightful column in the `results` matrix, and thereafter | |
## completely removed from the string. Thus, with each successful iteration, | |
## the EPC string is shortened. If after a correct value is extracted, the | |
## next 2-digit sequence should be valid; if this is not so, it means that | |
## the EPC string is not valid and the function fails with an error. | |
lapply(epc, function(str) { | |
originalstr <- str | |
for (i in seq_len(ncols)) { | |
lead <- substr(str, 0, 2) | |
leadindex <- which(leads %in% lead) | |
if (length(leadindex) != 1L) { | |
warning( | |
"The last value extracted from ", | |
sQuote(originalstr), | |
" was ", | |
sQuote(lastValue), | |
" and the current lead digits are ", | |
sQuote(lead), | |
call. = FALSE | |
) | |
stop("Malformed EPC string", call. = FALSE) | |
} | |
leadname <- leadnames[leadindex] | |
val <- if (leadname == "gtin") { | |
comp <- get_val(str, lead, 14) | |
comp | |
} | |
else if (leadname == "date") { | |
comp <- get_val(str, lead, 6) | |
comp |> | |
as.Date(format = "%y%m%d") |> | |
format("%Y/%m/%d") | |
} | |
else if (leadname == "serial") { | |
comp <- get_val(str, lead) | |
toupper(comp) | |
} | |
else { | |
comp <- get_val(str, lead) | |
comp | |
} | |
result[nrow(result), leadname] <<- val | |
lastValue <<- comp | |
str <- sub(paste0(rgxify(lead), comp), "", str) | |
} | |
result <<- rbind(result, rep(NA_character_, ncols)) | |
}) | |
## After all is done, convert the matrix to a data frame and | |
## remove the extra row that was added by the preceding loop. | |
df <- as.data.frame(result) | |
df[complete.cases(df), ] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("epc.R") | |
epc1 <- "01061590000001081721012210FG4676RTY21AYBMrds16z0k" | |
parseEPC(epc1) | |
epc2 <- "0103664798013962112109031724083110V3K321V21136P3HP961F4WE" | |
parseEPC(epc2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment