Skip to content

Instantly share code, notes, and snippets.

@pkakelas
Created June 14, 2021 21:27
Show Gist options
  • Save pkakelas/3bb3eebe1f7dc6c6a0f2c48ea7d8c4c6 to your computer and use it in GitHub Desktop.
Save pkakelas/3bb3eebe1f7dc6c6a0f2c48ea7d8c4c6 to your computer and use it in GitHub Desktop.
library("arules")
# Exercise 1
# Import csv and create a unique set of all the grocery names
groceries <- read.csv("./GroceriesInitial.csv", header=TRUE, sep=",")
product_names = unique(unlist(groceries[,4,35]))
# Transform groceries to a binary correlation values
products <- as.data.frame(t(apply(groceries[,4:35], 1, function(x)
(product_names) %in% as.character(unlist(x)))))
names(products) <- product_names
# Add `id`, `basket_value` and `recency_days` columns
groceries <- data.frame(cbind(groceries[,1:3], products))
to_keep <- c(
"id",
"basket_value",
"recency_days",
"citrus.fruit",
"tropical.fruit",
"whole.milk",
"other.vegetables",
#"rolls/buns",
"chocolate",
"bottled.water",
"yogurt",
"sausage",
"root.vegetables",
"pastry",
"soda",
"cream"
)
# filter columns and keep only the wanted ones
groceries <- groceries[ , to_keep]
# Create 3 new columns with default value FALSE
groceries$low_value_basket <- FALSE
groceries$medium_value_basket <- FALSE
groceries$high_value_basket <- FALSE
# Set the values of these columns conditionaly to true
groceries$low_value_basket[groceries$basket_value <= 2.5] <- TRUE
groceries$medium_value_basket[groceries$basket_value > 2.5 & groceries$basket_value <= 6] <- TRUE
groceries$high_value_basket[groceries$basket_value > 6] <- TRUE
# The 3 counts below are roughly the same
str(sum(groceries$low_value_basket == TRUE))
str(sum(groceries$medium_value_basket == TRUE))
str(sum(groceries$high_value_basket == TRUE))
# Exercise 2
only_items <- groceries[,4:ncol(groceries)]
only_items$low_value_basket <- NULL
only_items$medium_value_basket <- NULL
only_items$high_value_basket <- NULL
rules <- apriori(
only_items,
parameter = list(minlen=2, maxlen=3, supp = 0.01, conf = 0.5),
control = list(verbose = FALSE)
)
rules_sorted_only_items <- sort(rules, by="lift")
#print(inspect(rules_sorted_only_items))
rules <- apriori(
groceries[,1:ncol(groceries)],
parameter = list(minlen=2, maxlen=3, supp = 0.01, conf = 0.5),
control = list(verbose = FALSE)
)
rules_sorted_all <- sort((rules), by="lift")
#print(inspect(rules_sorted_all))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment