Created
April 13, 2012 07:14
-
-
Save scepion1d/2374730 to your computer and use it in GitHub Desktop.
Apriori algorithm realisation.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.sql.ResultSet; | |
import java.sql.Statement; | |
import java.util.StringTokenizer; | |
import java.util.Vector; | |
public class Apriori { | |
private static Statement stmt; | |
private static Vector<String> candidates; | |
private static Vector<Double> supports; | |
private static double min_support; | |
private static double one_percent; | |
public static void start(int max_prod_number, double min_supp, Statement new_statement) throws Exception { | |
stmt = new_statement; | |
min_support = min_supp; | |
candidates = null; | |
supports = null; | |
get_all_transactions(); | |
for (int i = 1; i <= max_prod_number; i++) { | |
generate_candidates(i); | |
get_frequent_sets(i); | |
} | |
prepare_product_data(2); | |
} | |
public static Vector<String> get_candidates() { | |
return candidates; | |
} | |
public static Vector<Double> get_supports() { | |
return supports; | |
} | |
private static void get_all_transactions() throws Exception { | |
String query = "SELECT COUNT(id) FROM bills"; | |
ResultSet rs = stmt.executeQuery(query); | |
rs.next(); | |
one_percent = rs.getDouble(1) / 100.0; | |
} | |
private static void prepare_product_data(int count) throws Exception { | |
Vector<String> new_candidates = new Vector<String>(); | |
for (String candidate : candidates) { | |
String[] prods; | |
if (count > 1) | |
prods = candidate.split(" "); | |
else | |
prods = new String[] {candidate}; | |
String cnddt = ""; | |
for (String prod : prods) { | |
String query = "SELECT color, consumer, name FROM product_dim WHERE id=" + prod; | |
ResultSet rs = stmt.executeQuery(query); | |
rs.next(); | |
cnddt += rs.getString(1) + " " + rs.getString(2) + " " + rs.getString(3) +",\n"; | |
} | |
new_candidates.add(cnddt.substring(0,cnddt.length()-2)); | |
} | |
candidates = new Vector<String>(new_candidates); | |
} | |
private static void generate_candidates(int count) throws Exception { | |
Vector<String> new_candidates = new Vector<String>(); | |
if (count == 1) { // first set of candidates is loading from db | |
String query = "SELECT product FROM facts GROUP BY product"; | |
ResultSet rs = stmt.executeQuery(query); | |
while (rs.next()) | |
new_candidates.add(rs.getString(1)); | |
} else if (count == 2) { | |
for (int i = 0, candidatesSize = candidates.size(); i < candidatesSize; i++) { | |
StringTokenizer st1 = new StringTokenizer(candidates.get(i)); | |
String str1 = st1.nextToken(); | |
for (int j = i + 1; j < candidates.size(); j++) { | |
StringTokenizer st2 = new StringTokenizer(candidates.elementAt(j)); | |
String str2 = st2.nextToken(); | |
new_candidates.add(str1 + " " + str2); | |
} | |
} | |
} | |
candidates = new Vector<String>(new_candidates); | |
} | |
private static void get_frequent_sets(int count) throws Exception { | |
Vector<String> new_candidates = new Vector<String>(); | |
Vector<Double> new_supports = new Vector<Double>(); | |
for (String candidate : candidates) { | |
String query = ""; | |
if (count == 1) { | |
query = "SELECT COUNT(bill) FROM facts WHERE product=" + candidate; | |
} else if (count == 2) { | |
String[] prods = candidate.split(" "); | |
query = "SELECT COUNT(A.bill) " + | |
"FROM facts A, facts B " + | |
"WHERE A.bill=B.bill and " + | |
"A.product=" + prods[0] + " and B.product=" + prods[1]; | |
} | |
ResultSet rs = stmt.executeQuery(query); | |
rs.next(); | |
double support = rs.getDouble(1) / one_percent; | |
if (support >= min_support) { | |
new_candidates.add(candidate); | |
new_supports.add(support); | |
} | |
} | |
candidates = new Vector<String>(new_candidates); | |
supports = new Vector<Double>(new_supports); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment