Skip to content

Instantly share code, notes, and snippets.

@mrecos
Created February 8, 2016 21:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrecos/2f728f7eb7632c72eb51 to your computer and use it in GitHub Desktop.
Save mrecos/2f728f7eb7632c72eb51 to your computer and use it in GitHub Desktop.
Two R functions; 1) recode values in a vector based on a look up table including replacement for NA and no matching codes; 2) take a vector of concatenated codes, split based on a character, recode, and concatenate back together.
lut_match <- function(LUT, code_field, desc_col = 2, is_NA = "NA", no_code = "N/A"){
require(stringr)
require(splitstackshape)
# this loops through all unique codes in code_field. Potentially slow
code_field[which(code_field == "")] <- NA
unique_code <- unique(code_field)
coded <- code_field
for(i in seq_along(unique_code)){
if(is.na(unique_code[i])){
coded[is.na(coded)] <- is_NA # set this to whatever works
} else if(unique_code[i] %in% LUT[,1]){
coded[which(code_field == unique_code[i])] <- ifelse(unique_code[i] %in% LUT[,1],
LUT[which(LUT[,1] == unique_code[i]),2], no_code)
}
}
return(coded)
}
col_cat <- function(LUT, code_df, colname, delim = ","){
require(stringr)
require(splitstackshape)
code_length <- max(sapply(strsplit(code_df[,colname], delim), length))
# code_col <- colnames(code_df)
code_split <- data.frame(cSplit(code_df, colname, delim, drop = FALSE))
code_split <- code_split[,((ncol(code_split)-(code_length-1)):ncol(code_split))] # get only new columns
for(k in seq_len(ncol(code_split))){
code_split[,k] <- lut_match(LUT, code_split[,k], is_NA = NA)
}
df_args <- c(code_split, sep=",")
code_string <- do.call(paste, df_args)
code_string <- gsub(",NA", "", x = code_string)
return(code_string)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment