rtirrell/gist:770325

## gistfile1.txt
Rprof()
# Read a list of about about 100K vectors, each with fewer than 30 items
# (most with a few). These are supermarket-type transactions.
transactions = lapply(strsplit(readLines('Data/retail.dat'), ' '), as.numeric)
transactions.unlisted = unlist(transactions)

# Count the total number of items over all transactions.
nitems = length(transactions.unlisted)
# And the number of occurrences of each item.
counts = table(transactions.unlisted)

# The minimum count required for a k-tuple to be considered a frequent
# set of items.
threshold.count = 0.02 * length(transactions)

# Set of frequent tuples for k = 1.
frequent = list(
  single = (1:length(counts))[counts > threshold.count]
)

# All candidate 2-tuples.
frequent$cdouble = t(combn(frequent$single, 2))

# Only some of which are truly frequent (occur in more than threshold.count
# transactions).
rm.rows = numeric(nrow(frequent$cdouble))
for (i in 1:5) {
  if (sum(sapply(transactions, function(r) all(frequent$cdouble[i,] %in% r))) < threshold.count) {
#   if (sum(sapply(transactions, function(r) length(intersect(frequent$cdouble[i,], r)) == 2)) < threshold.count) {

    rm.rows[i] = 1
  }
}
	Rprof()
	# Read a list of about about 100K vectors, each with fewer than 30 items
	# (most with a few). These are supermarket-type transactions.
	transactions = lapply(strsplit(readLines('Data/retail.dat'), ' '), as.numeric)
	transactions.unlisted = unlist(transactions)

	# Count the total number of items over all transactions.
	nitems = length(transactions.unlisted)
	# And the number of occurrences of each item.
	counts = table(transactions.unlisted)

	# The minimum count required for a k-tuple to be considered a frequent
	# set of items.
	threshold.count = 0.02 * length(transactions)

	# Set of frequent tuples for k = 1.
	frequent = list(
	single = (1:length(counts))[counts > threshold.count]
	)

	# All candidate 2-tuples.
	frequent$cdouble = t(combn(frequent$single, 2))

	# Only some of which are truly frequent (occur in more than threshold.count
	# transactions).
	rm.rows = numeric(nrow(frequent$cdouble))
	for (i in 1:5) {
	if (sum(sapply(transactions, function(r) all(frequent$cdouble[i,] %in% r))) < threshold.count) {
	# if (sum(sapply(transactions, function(r) length(intersect(frequent$cdouble[i,], r)) == 2)) < threshold.count) {

	rm.rows[i] = 1
	}
	}