mrecos/DF string split and concatenate.r

## DF string split and concatenate.r
lut_match <- function(LUT, code_field, desc_col = 2, is_NA = "NA", no_code = "N/A"){
require(stringr)
require(splitstackshape)
  # this loops through all unique codes in code_field.  Potentially slow
  code_field[which(code_field == "")] <- NA
  unique_code <- unique(code_field)
  coded <- code_field
  for(i in seq_along(unique_code)){
    if(is.na(unique_code[i])){
      coded[is.na(coded)] <- is_NA # set this to whatever works
    } else if(unique_code[i] %in% LUT[,1]){
    coded[which(code_field == unique_code[i])] <- ifelse(unique_code[i] %in% LUT[,1],
                                                         LUT[which(LUT[,1] == unique_code[i]),2], no_code)

    }
  }
  return(coded)
}

col_cat <- function(LUT, code_df, colname, delim = ","){
require(stringr)
require(splitstackshape)
  code_length <- max(sapply(strsplit(code_df[,colname], delim), length))
  # code_col <- colnames(code_df)
  code_split <- data.frame(cSplit(code_df, colname, delim, drop = FALSE))
  code_split <- code_split[,((ncol(code_split)-(code_length-1)):ncol(code_split))] # get only new columns
  for(k in seq_len(ncol(code_split))){
    code_split[,k] <- lut_match(LUT, code_split[,k], is_NA = NA)
  }
  df_args <- c(code_split, sep=",")
  code_string <- do.call(paste, df_args)
  code_string <- gsub(",NA", "", x = code_string)
  return(code_string)
}
	lut_match <- function(LUT, code_field, desc_col = 2, is_NA = "NA", no_code = "N/A"){
	require(stringr)
	require(splitstackshape)
	# this loops through all unique codes in code_field. Potentially slow
	code_field[which(code_field == "")] <- NA
	unique_code <- unique(code_field)
	coded <- code_field
	for(i in seq_along(unique_code)){
	if(is.na(unique_code[i])){
	coded[is.na(coded)] <- is_NA # set this to whatever works
	} else if(unique_code[i] %in% LUT[,1]){
	coded[which(code_field == unique_code[i])] <- ifelse(unique_code[i] %in% LUT[,1],
	LUT[which(LUT[,1] == unique_code[i]),2], no_code)

	}
	}
	return(coded)
	}

	col_cat <- function(LUT, code_df, colname, delim = ","){
	require(stringr)
	require(splitstackshape)
	code_length <- max(sapply(strsplit(code_df[,colname], delim), length))
	# code_col <- colnames(code_df)
	code_split <- data.frame(cSplit(code_df, colname, delim, drop = FALSE))
	code_split <- code_split[,((ncol(code_split)-(code_length-1)):ncol(code_split))] # get only new columns
	for(k in seq_len(ncol(code_split))){
	code_split[,k] <- lut_match(LUT, code_split[,k], is_NA = NA)
	}
	df_args <- c(code_split, sep=",")
	code_string <- do.call(paste, df_args)
	code_string <- gsub(",NA", "", x = code_string)
	return(code_string)
	}