Skip to content

Instantly share code, notes, and snippets.

@TinoDidriksen
Created September 29, 2016 15:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save TinoDidriksen/7c8f7a2eee178073dcfc41578e9b455d to your computer and use it in GitHub Desktop.
Save TinoDidriksen/7c8f7a2eee178073dcfc41578e9b455d to your computer and use it in GitHub Desktop.
DM555 Assignment 1: Apriori Algorithm
#include <iostream>
#include <set>
#include <vector>
#include <map>
#include <algorithm>
#include <string>
using namespace std; // Wrong, but easier to read in homework
// Helper types
typedef set<char> row_t;
typedef vector<row_t> db_t;
typedef map<row_t, size_t> level_t;
typedef vector<level_t> levels_t;
// Helper function for pretty-printing rows
string to_string(const row_t& row) {
string rv{ "{ " };
for (auto c : row) {
rv += c;
rv += ' ';
}
rv += '}';
return rv;
}
// Helper function for making all combinations
db_t combinations(const row_t& row, size_t c) {
db_t cs;
if (row.size() < c) {
return cs;
}
vector<bool> bitset(row.size() - c, 0);
bitset.resize(row.size(), 1);
do {
row_t nr;
auto it = row.begin();
for (size_t i = 0; i < row.size(); ++i, ++it) {
if (bitset[i]) {
nr.insert(*it);
}
}
cs.push_back(nr);
} while (next_permutation(bitset.begin(), bitset.end()));
return cs;
}
// Helper function to remove candidates below support threshold
level_t remove_infrequent(level_t cands, size_t e) {
for (auto it = cands.begin() ; it != cands.end() ; ) {
if (it->second < e) {
cout << "Discarding " << to_string(it->first) << " for e=" << it->second << endl;
it = cands.erase(it);
}
else {
++it;
}
}
return cands;
}
// Actual worker function
levels_t apriori(const db_t& db, size_t e) {
// Set up 1-itemsets
levels_t levels(1);
for (auto& row : db) {
for (auto c : row) {
++levels[0][row_t{ c }];
}
}
levels[0] = remove_infrequent(levels[0], e);
for (size_t k = 2 ; ; ++k) {
cout << "Level " << k << endl;
// Construct candidates from previous level
level_t cands;
for (auto& a : levels[k - 2]) {
for (auto& b : levels[k - 2]) {
bool good = (a.first != b.first); // Don't even try to union same row
// Check that the first
if (good && k > 2) {
auto end = a.first.begin();
advance(end, k - 2);
good = equal(a.first.begin(), end, b.first.begin());
}
if (good) {
row_t row = a.first;
row.insert(b.first.begin(), b.first.end());
cands[row];
}
}
}
cout << "Candidate sets are:" << endl;
for (auto& row : cands) {
cout << to_string(row.first) << endl;
}
cout << endl;
// Determine which candidates can't possibly be frequent
for (auto cand = cands.begin() ; cand != cands.end() ; ) {
bool good = true;
for (auto& comb : combinations(cand->first, k - 1)) {
if (levels[k - 2].count(comb) == 0) {
cout << "Discarding " << to_string(cand->first) << " for subset " << to_string(comb) << endl;
good = false;
break;
}
}
if (good == false) {
cand = cands.erase(cand);
}
else {
++cand;
}
}
// Determine frequency, then prune infrequent
for (auto& row : db) {
for (auto& cand : cands) {
if (includes(row.begin(), row.end(), cand.first.begin(), cand.first.end())) {
++cand.second;
}
}
}
cands = remove_infrequent(cands, e);
cout << endl;
if (cands.empty()) {
cout << "No candidates remain - ending search" << endl << endl;
break;
}
cout << "Supported sets are:" << endl;
for (auto& row : cands) {
cout << to_string(row.first) << " e=" << row.second << endl;
}
levels.push_back(cands);
cout << endl;
}
cout << "Final supported sets are:" << endl;
for (auto& cands : levels) {
for (auto& row : cands) {
cout << to_string(row.first) << " e=" << row.second << endl;
}
}
return levels;
}
int main() {
cout << boolalpha;
db_t db{
{ 'A', 'B', 'E' },
{ 'B', 'D' },
{ 'C', 'D', 'F' },
{ 'A', 'B', 'D' },
{ 'A', 'C', 'E' },
{ 'B', 'C', 'E', 'F' },
{ 'A', 'C', 'E' },
{ 'A', 'B', 'C', 'E' },
{ 'A', 'B', 'C', 'D', 'F' },
{ 'B', 'C', 'D', 'E' },
};
auto levels = apriori(db, 2);
for (size_t i = 0 ; i < levels.size() ; ++i) {
for (auto& row : levels[i]) {
bool mfi = true;
bool cfi = true;
if (i < levels.size() - 1) {
for (auto& sup : levels[i + 1]) {
if (includes(sup.first.begin(), sup.first.end(), row.first.begin(), row.first.end())) {
mfi = false;
if (sup.second == row.second) {
cfi = false;
}
}
}
}
cout << to_string(row.first) << " is mfi=" << mfi << " and cfi=" << cfi << endl;
}
}
}
Level 2
Candidate sets are:
{ A B }
{ A C }
{ A D }
{ A E }
{ A F }
{ B C }
{ B D }
{ B E }
{ B F }
{ C D }
{ C E }
{ C F }
{ D E }
{ D F }
{ E F }
Discarding { A F } for e=1
Discarding { D E } for e=1
Discarding { E F } for e=1
Supported sets are:
{ A B } e=4
{ A C } e=4
{ A D } e=2
{ A E } e=4
{ B C } e=4
{ B D } e=4
{ B E } e=4
{ B F } e=2
{ C D } e=3
{ C E } e=5
{ C F } e=3
{ D F } e=2
Level 3
Candidate sets are:
{ A B C }
{ A B D }
{ A B E }
{ A C D }
{ A C E }
{ A D E }
{ B C D }
{ B C E }
{ B C F }
{ B D E }
{ B D F }
{ B E F }
{ C D E }
{ C D F }
{ C E F }
Discarding { A D E } for subset { D E }
Discarding { B D E } for subset { D E }
Discarding { B E F } for subset { E F }
Discarding { C D E } for subset { D E }
Discarding { C E F } for subset { E F }
Discarding { A C D } for e=1
Discarding { B D F } for e=1
Supported sets are:
{ A B C } e=2
{ A B D } e=2
{ A B E } e=2
{ A C E } e=3
{ B C D } e=2
{ B C E } e=3
{ B C F } e=2
{ C D F } e=2
Level 4
Candidate sets are:
{ A B C D }
{ A B C E }
{ A B D E }
{ B C D E }
{ B C D F }
{ B C E F }
Discarding { A B C D } for subset { A C D }
Discarding { A B D E } for subset { B D E }
Discarding { B C D E } for subset { C D E }
Discarding { B C D F } for subset { B D F }
Discarding { B C E F } for subset { C E F }
Discarding { A B C E } for e=1
No candidates remain - ending search
Final supported sets are:
{ A } e=6
{ B } e=7
{ C } e=7
{ D } e=5
{ E } e=6
{ F } e=3
{ A B } e=4
{ A C } e=4
{ A D } e=2
{ A E } e=4
{ B C } e=4
{ B D } e=4
{ B E } e=4
{ B F } e=2
{ C D } e=3
{ C E } e=5
{ C F } e=3
{ D F } e=2
{ A B C } e=2
{ A B D } e=2
{ A B E } e=2
{ A C E } e=3
{ B C D } e=2
{ B C E } e=3
{ B C F } e=2
{ C D F } e=2
{ A } is mfi=false and cfi=true
{ B } is mfi=false and cfi=true
{ C } is mfi=false and cfi=true
{ D } is mfi=false and cfi=true
{ E } is mfi=false and cfi=true
{ F } is mfi=false and cfi=false
{ A B } is mfi=false and cfi=true
{ A C } is mfi=false and cfi=true
{ A D } is mfi=false and cfi=false
{ A E } is mfi=false and cfi=true
{ B C } is mfi=false and cfi=true
{ B D } is mfi=false and cfi=true
{ B E } is mfi=false and cfi=true
{ B F } is mfi=false and cfi=false
{ C D } is mfi=false and cfi=true
{ C E } is mfi=false and cfi=true
{ C F } is mfi=false and cfi=true
{ D F } is mfi=false and cfi=false
{ A B C } is mfi=true and cfi=true
{ A B D } is mfi=true and cfi=true
{ A B E } is mfi=true and cfi=true
{ A C E } is mfi=true and cfi=true
{ B C D } is mfi=true and cfi=true
{ B C E } is mfi=true and cfi=true
{ B C F } is mfi=true and cfi=true
{ C D F } is mfi=true and cfi=true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment