Plaudenslager/cleanup_names.r

## cleanup_names.r
library(dplyr)
library(tidyr)
# sum_trialname contains product names, including three different versions of one product
# dropping everything after the first space gets me to a consistent product naming
# the extract function, by default, captures the initial alphanumeric data, and drops everything after the first non-alpha character
# by default, teh extract function also drops the original column (sum_trialname, in this case)

# Create a new column with clean, consistent product names
clean_data <- extract(clean_data, sum_trialname, "Product", remove=FALSE)
	library(dplyr)
	library(tidyr)
	# sum_trialname contains product names, including three different versions of one product
	# dropping everything after the first space gets me to a consistent product naming
	# the extract function, by default, captures the initial alphanumeric data, and drops everything after the first non-alpha character
	# by default, teh extract function also drops the original column (sum_trialname, in this case)

	# Create a new column with clean, consistent product names
	clean_data <- extract(clean_data, sum_trialname, "Product", remove=FALSE)