Skip to content

Instantly share code, notes, and snippets.

@scepion1d
Created April 13, 2012 07:14
Show Gist options
  • Save scepion1d/2374730 to your computer and use it in GitHub Desktop.
Save scepion1d/2374730 to your computer and use it in GitHub Desktop.
Apriori algorithm realisation.
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.StringTokenizer;
import java.util.Vector;
public class Apriori {
private static Statement stmt;
private static Vector<String> candidates;
private static Vector<Double> supports;
private static double min_support;
private static double one_percent;
public static void start(int max_prod_number, double min_supp, Statement new_statement) throws Exception {
stmt = new_statement;
min_support = min_supp;
candidates = null;
supports = null;
get_all_transactions();
for (int i = 1; i <= max_prod_number; i++) {
generate_candidates(i);
get_frequent_sets(i);
}
prepare_product_data(2);
}
public static Vector<String> get_candidates() {
return candidates;
}
public static Vector<Double> get_supports() {
return supports;
}
private static void get_all_transactions() throws Exception {
String query = "SELECT COUNT(id) FROM bills";
ResultSet rs = stmt.executeQuery(query);
rs.next();
one_percent = rs.getDouble(1) / 100.0;
}
private static void prepare_product_data(int count) throws Exception {
Vector<String> new_candidates = new Vector<String>();
for (String candidate : candidates) {
String[] prods;
if (count > 1)
prods = candidate.split(" ");
else
prods = new String[] {candidate};
String cnddt = "";
for (String prod : prods) {
String query = "SELECT color, consumer, name FROM product_dim WHERE id=" + prod;
ResultSet rs = stmt.executeQuery(query);
rs.next();
cnddt += rs.getString(1) + " " + rs.getString(2) + " " + rs.getString(3) +",\n";
}
new_candidates.add(cnddt.substring(0,cnddt.length()-2));
}
candidates = new Vector<String>(new_candidates);
}
private static void generate_candidates(int count) throws Exception {
Vector<String> new_candidates = new Vector<String>();
if (count == 1) { // first set of candidates is loading from db
String query = "SELECT product FROM facts GROUP BY product";
ResultSet rs = stmt.executeQuery(query);
while (rs.next())
new_candidates.add(rs.getString(1));
} else if (count == 2) {
for (int i = 0, candidatesSize = candidates.size(); i < candidatesSize; i++) {
StringTokenizer st1 = new StringTokenizer(candidates.get(i));
String str1 = st1.nextToken();
for (int j = i + 1; j < candidates.size(); j++) {
StringTokenizer st2 = new StringTokenizer(candidates.elementAt(j));
String str2 = st2.nextToken();
new_candidates.add(str1 + " " + str2);
}
}
}
candidates = new Vector<String>(new_candidates);
}
private static void get_frequent_sets(int count) throws Exception {
Vector<String> new_candidates = new Vector<String>();
Vector<Double> new_supports = new Vector<Double>();
for (String candidate : candidates) {
String query = "";
if (count == 1) {
query = "SELECT COUNT(bill) FROM facts WHERE product=" + candidate;
} else if (count == 2) {
String[] prods = candidate.split(" ");
query = "SELECT COUNT(A.bill) " +
"FROM facts A, facts B " +
"WHERE A.bill=B.bill and " +
"A.product=" + prods[0] + " and B.product=" + prods[1];
}
ResultSet rs = stmt.executeQuery(query);
rs.next();
double support = rs.getDouble(1) / one_percent;
if (support >= min_support) {
new_candidates.add(candidate);
new_supports.add(support);
}
}
candidates = new Vector<String>(new_candidates);
supports = new Vector<Double>(new_supports);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment