BroVic/epc.R

## epc.R
# Receives as input a vector containing EPC strings and returns a data frame
# with each component of a string assigned to the appropriate column of the
# data frame, and the data frame will have as many rows as there are strings.
parseEPC <- function(epc) {
  # internal functions
  ## Creates a regex anchor for the beginning of a string
  rgxify <- function(x) {
    stopifnot(exprs = {
      is.character(x)
      length(x) == 1L
    })
    paste0("^", x)
  }

  ## get the value of a given part of the EPC string
  ## params: str - The EPC string (or what's left of it)
  ##         lead - The 2-character sequence for the given part
  ##         no.chars - The length of this part of the EPC string (if known)
  ## Return value: The extracted part of the EPC string
  get_val <- function(str, lead, no.chars = NULL) {
    lead.rgx <- rgxify(lead)
    numLeadDigits <- nchar(lead)
    pos <- regexpr(lead.rgx, str) + numLeadDigits

    if (is.null(no.chars)) {
      serial.pos <- unlist(gregexpr("21", str))

      no.chars <- if (serial.pos > 1L && lead.rgx == "^10")
        nchar(substr(str, pos, serial.pos - 1))
      else
        nchar(str) - numLeadDigits

      return(get_val(str, lead, no.chars))
    }
    substr(str, pos, pos + (no.chars - 1))
  }

  # main function logic
  if (!is.character(epc))
    stop("'epc' should be a character vector")

  leadnames <- c("gtin", "date", "batch", "serial")
  leads <- structure(c("01", "17", "10", "21"), names = leadnames)
  lastValue <- NA
  ncols <- 4L
  result <- matrix("", ncol = ncols, dimnames = list(NULL, leadnames))

  ## Apply an anonymous function to each element of the EPC string vector.
  ## This function contains a loop which uses the lead 2-digit sequence
  ## that applies to each component of the EPC string to extract the
  ## appropriate value. When found, that value is extracted and assigned
  ## to the its rightful column in the `results` matrix, and thereafter
  ## completely removed from the string. Thus, with each successful iteration,
  ## the EPC string is shortened. If after a correct value is extracted, the
  ## next 2-digit sequence should be valid; if this is not so, it means that
  ## the EPC string is not valid and the function fails with an error.
  lapply(epc, function(str) {
    originalstr <- str

    for (i in seq_len(ncols)) {
      lead <- substr(str, 0, 2)
      leadindex <- which(leads %in% lead)

      if (length(leadindex) != 1L) {
        warning(
          "The last value extracted from ",
          sQuote(originalstr),
          " was ",
          sQuote(lastValue),
          " and the current lead digits are ",
          sQuote(lead),
          call. = FALSE
        )
        stop("Malformed EPC string", call. = FALSE)
      }

      leadname <- leadnames[leadindex]

      val <- if (leadname == "gtin") {
        comp <- get_val(str, lead, 14)
        comp
      }
      else if (leadname == "date") {
        comp <- get_val(str, lead, 6)

        comp |>
          as.Date(format = "%y%m%d") |>
          format("%Y/%m/%d")
      }
      else if (leadname == "serial") {
        comp <- get_val(str, lead)
        toupper(comp)
      }
      else {
        comp <- get_val(str, lead)
        comp
      }

      result[nrow(result), leadname] <<- val
      lastValue <<- comp
      str <- sub(paste0(rgxify(lead), comp), "", str)
    }

    result <<- rbind(result, rep(NA_character_, ncols))
  })

  ## After all is done, convert the matrix to a data frame and
  ## remove the extra row that was added by the preceding loop.
  df <- as.data.frame(result)
  df[complete.cases(df), ]
}

## main.R
source("epc.R")

epc1 <- "01061590000001081721012210FG4676RTY21AYBMrds16z0k"
parseEPC(epc1)


epc2 <- "0103664798013962112109031724083110V3K321V21136P3HP961F4WE"
parseEPC(epc2)
	# Receives as input a vector containing EPC strings and returns a data frame
	# with each component of a string assigned to the appropriate column of the
	# data frame, and the data frame will have as many rows as there are strings.
	parseEPC <- function(epc) {
	# internal functions
	## Creates a regex anchor for the beginning of a string
	rgxify <- function(x) {
	stopifnot(exprs = {
	is.character(x)
	length(x) == 1L
	})
	paste0("^", x)
	}

	## get the value of a given part of the EPC string
	## params: str - The EPC string (or what's left of it)
	## lead - The 2-character sequence for the given part
	## no.chars - The length of this part of the EPC string (if known)
	## Return value: The extracted part of the EPC string
	get_val <- function(str, lead, no.chars = NULL) {
	lead.rgx <- rgxify(lead)
	numLeadDigits <- nchar(lead)
	pos <- regexpr(lead.rgx, str) + numLeadDigits

	if (is.null(no.chars)) {
	serial.pos <- unlist(gregexpr("21", str))

	no.chars <- if (serial.pos > 1L && lead.rgx == "^10")
	nchar(substr(str, pos, serial.pos - 1))
	else
	nchar(str) - numLeadDigits

	return(get_val(str, lead, no.chars))
	}
	substr(str, pos, pos + (no.chars - 1))
	}

	# main function logic
	if (!is.character(epc))
	stop("'epc' should be a character vector")

	leadnames <- c("gtin", "date", "batch", "serial")
	leads <- structure(c("01", "17", "10", "21"), names = leadnames)
	lastValue <- NA
	ncols <- 4L
	result <- matrix("", ncol = ncols, dimnames = list(NULL, leadnames))

	## Apply an anonymous function to each element of the EPC string vector.
	## This function contains a loop which uses the lead 2-digit sequence
	## that applies to each component of the EPC string to extract the
	## appropriate value. When found, that value is extracted and assigned
	## to the its rightful column in the `results` matrix, and thereafter
	## completely removed from the string. Thus, with each successful iteration,
	## the EPC string is shortened. If after a correct value is extracted, the
	## next 2-digit sequence should be valid; if this is not so, it means that
	## the EPC string is not valid and the function fails with an error.
	lapply(epc, function(str) {
	originalstr <- str

	for (i in seq_len(ncols)) {
	lead <- substr(str, 0, 2)
	leadindex <- which(leads %in% lead)

	if (length(leadindex) != 1L) {
	warning(
	"The last value extracted from ",
	sQuote(originalstr),
	" was ",
	sQuote(lastValue),
	" and the current lead digits are ",
	sQuote(lead),
	call. = FALSE
	)
	stop("Malformed EPC string", call. = FALSE)
	}

	leadname <- leadnames[leadindex]

	val <- if (leadname == "gtin") {
	comp <- get_val(str, lead, 14)
	comp
	}
	else if (leadname == "date") {
	comp <- get_val(str, lead, 6)

	comp \|>
	as.Date(format = "%y%m%d") \|>
	format("%Y/%m/%d")
	}
	else if (leadname == "serial") {
	comp <- get_val(str, lead)
	toupper(comp)
	}
	else {
	comp <- get_val(str, lead)
	comp
	}

	result[nrow(result), leadname] <<- val
	lastValue <<- comp
	str <- sub(paste0(rgxify(lead), comp), "", str)
	}

	result <<- rbind(result, rep(NA_character_, ncols))
	})

	## After all is done, convert the matrix to a data frame and
	## remove the extra row that was added by the preceding loop.
	df <- as.data.frame(result)
	df[complete.cases(df), ]
	}
	source("epc.R")

	epc1 <- "01061590000001081721012210FG4676RTY21AYBMrds16z0k"
	parseEPC(epc1)


	epc2 <- "0103664798013962112109031724083110V3K321V21136P3HP961F4WE"
	parseEPC(epc2)