kendravant/free text manipulation

## free text manipulation
library(stringr)

df$NOTE = str_replace_all(df$NOTE,"\\n"," ") # Removes new line characters
df$NOTE = str_replace_all(df$NOTE,"\\r"," ") # Removes carriage returns
df$NOTE = str_replace_all(df$NOTE,"\\s+"," ") # Replaces two or more spaces with a single space

df$STRIPPED_NOTE = str_replace_all(df$STRIPPED_NOTE, fixed("Travel,"), " ")
# Faster because it explicitly looks for a fixed text string rather than a regexp

str_detect(df$NOTE,fixed("Family's future,")) # To create a flag if string present rather than mutate string

# Hack to estimate word count in a text sample
df$WORD_COUNT = sapply(gregexpr("\\W+", df$NOTE), length) + 1
df$WORD_COUNT[df$NOTE == ""] = 0
	library(stringr)

	df$NOTE = str_replace_all(df$NOTE,"\\n"," ") # Removes new line characters
	df$NOTE = str_replace_all(df$NOTE,"\\r"," ") # Removes carriage returns
	df$NOTE = str_replace_all(df$NOTE,"\\s+"," ") # Replaces two or more spaces with a single space

	df$STRIPPED_NOTE = str_replace_all(df$STRIPPED_NOTE, fixed("Travel,"), " ")
	# Faster because it explicitly looks for a fixed text string rather than a regexp

	str_detect(df$NOTE,fixed("Family's future,")) # To create a flag if string present rather than mutate string

	# Hack to estimate word count in a text sample
	df$WORD_COUNT = sapply(gregexpr("\\W+", df$NOTE), length) + 1
	df$WORD_COUNT[df$NOTE == ""] = 0