Skip to content

Instantly share code, notes, and snippets.

@tomncooper
Created March 8, 2018 14:16
Show Gist options
  • Save tomncooper/22d94052449de164abc9cb9f26f454c5 to your computer and use it in GitHub Desktop.
Save tomncooper/22d94052449de164abc9cb9f26f454c5 to your computer and use it in GitHub Desktop.
Toy examle of using agrep to match food groups for participant food choices
library(tidyr)
# User data dataframe
user.foods = rbind("milk", "apples", "bread", "ice cream", "boxed fruits")
ID = rbind(1, 2, 3, 4, 5)
user.data = cbind(ID, user.foods)
colnames(user.data) = c("ID", "User.Food")
user.data = as.data.frame(user.data)
# Food Groups dataframe
foods = rbind("Cream", "Milk", "Yoghurt", "Bread", "Ice cream cones",
"Orange", "Carrots", "Milk chocolate")
foodcodes = rbind("Dairy", "Dairy", "Dairy", "Grains", "Dairy",
"Fruit and vegetables", "Fruit and vegetables", "Sweets")
food.groups = cbind(foods, foodcodes)
colnames(food.groups) = c("Food", "Food.Code")
food.groups = as.data.frame(food.groups)
# Find row indexes from food.groups that match pattern
match.food = function(pattern){
matches = agrep(pattern, food.groups$Food, ignore.case = TRUE,
value = FALSE)
return(matches)
}
# Add matched indexes to user data
user.data$matched.indexes = sapply(user.data$User.Food, FUN=match.food)
# Convert the no matches (which is a list with zero in) to NA
check = function(x) {
if(sum(x) == 0) {
return(TRUE)
} else {
return(FALSE)
}
}
user.data[sapply(user.data$matched.indexes, check), ]$matched.indexes = NA
# Convert to long format
long.user.data = separate_rows(user.data, matched.indexes, convert=FALSE)
# Filter out the weird "c" and "" entries that separate_rows add because
# apparently no one has ever come across a situation where a cell contains an
# actual list and not a list in the form of a comma separated string!
long.user.data = long.user.data[long.user.data$matched.indexes != "c" &
long.user.data$matched.indexes != ""
| is.na(long.user.data$matched.indexes), ]
# Add in the food group data
user.food.matches = cbind(long.user.data,
food.groups[long.user.data$matched.indexes,
c("Food", "Food.Code"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment