Skip to content

Instantly share code, notes, and snippets.

@danbri
Created March 23, 2011 13:15
Show Gist options
  • Save danbri/883070 to your computer and use it in GitHub Desktop.
Save danbri/883070 to your computer and use it in GitHub Desktop.
package tv.foaf;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.impl.recommender.svd.Factorizer;
import org.apache.mahout.cf.taste.impl.recommender.svd.SVDRecommender;
import org.apache.mahout.cf.taste.impl.recommender.svd.ALSWRFactorizer;
import org.apache.mahout.cf.taste.impl.recommender.svd.ExpectationMaximizationSVDFactorizer;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.IRStatistics;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import java.util.Iterator;
import java.io.*;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import java.util.Hashtable;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.vocabulary.*;
import com.hp.hpl.jena.query.*;
import java.io.StringReader;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Resource;
import java.io.StringWriter;
import java.net.URL;
import java.net.URLConnection;
import java.util.Calendar;
// http://www.jarvana.com/jarvana/view/org/apache/mahout/mahout-core/0.4/mahout-core-0.4-javadoc.jar!/org/apache/mahout/cf/taste/similarity/ItemSimilarity.html
/**
*
* This utility reads a Mahout-compatible CSV file consisting of userid, itemid, and ratings for broadcast episodes/items.
*
* @author Dan Brickley <danbri@danbri.org>
*
* It provides a number of Recommender configurations with different components and parameters, to help us find
* strategies that suit the dataset. It was designed for use with a BBC dataset but is otherwise quite general. There
* is also some code for creating a simple dump of similarity measures for each pair of content IDs, using an external dictionary
* of meaningful IDs rather than mahout-internal numeric IDs. In principle (for the BBC 'pid' IDs)
* these can be linked to RDF descriptions and higher-level groups (series, brands etc.), but that is not yet addressed.
*
* There are several similarity measures we can use. Some exploit pref values, some just patterns of correlation. Typically
* they can be applied both to user similarity, and to item similarity.
*
* Some Ignore prefs: TanimotoCoefficientSimilarity, LogLikelihoodSimilarity
* AveragingPreferenceInferrer - not very useful. Guesses a load of data values. Ignore.
* EuclideanDistanceSimilarity - requires pref values. Let's try it.
* SpearmanCorrelationSimilarity - slow
* TanimotoCoefficientSimilarity - ignores pref values
* LogLikelihoodSimilarity - ignores pref values
* PearsonCorrelationSimilarity - uses values
*
* ... also todo: what's the deal w/ symmetry? api docs suggest LL is symmetric, but tests below suggest otherwise.
*/
public class NotubeTVSimilarityDemo
{
// CONFIG
//
static String outfile = "beeb/_notube_item_sims_euclid.txt"; // where to dump pid x pid similarity matrix
// static String dictfile = "beeb/e20catalog_2010.tab.txt";
static String dictfile = "beeb/_catalog_pulse2010b.tab.txt"; // maps floats to string identifiers
static float default_eval_ratio = (float) 0.7; // what proportion to show to the recommender vs withhold for testing
static float default_eval_subset = (float) 0.05; // what subset of entire dataset. Usually 0.05 for us. Tried 0.1 made worse. Trying 0.02
//
// END CONFIG settings
public static void main(String[] args) throws IOException, TasteException, SAXException, ParserConfigurationException {
String recsFile = args[0]; // invoke with path to datamodel csv file
FileDataModel dataModel = null;
try{
dataModel = new FileDataModel(new File(recsFile));
System.out.println("data model:"+dataModel);
System.out.println("# Item count:"+dataModel.getNumItems() );
System.out.println("# User count:"+dataModel.getNumUsers() );
} catch (Exception e) { System.out.println("Bad things in main().: "+ e);
}
// ll_sim(dataModel); // call this to generate and store similarities
// ... but first we use recommender evaluation tool to understand data and metrics better
// System.out.println("Finished computing similarities.");
compare_recommenders(dataModel);
System.out.println("Finished comparing recommenders.");
}
/*
* AverageAbsoluteDifferenceRecommenderEvaluator is the error signal.
* What this value means depends on the implementation used – here, AverageAbsoluteDifferenceRecommenderEvaluator. A result of 1.0 from this implementation means that, on average, the recommender estimates a preference that deviates from the actual preference by 1.0.
A value of 1.0 is not great, on a scale of 1 to 5, but there is so little data here to begin with. Your results may differ as the data set is split randomly, and hence the training and test set may differ with each run.
*
* scores were with 0.05 of the data, test/seen ratio 0.7
* ... but these ratings go 0-10
* ... for the similarities that ignore pref values (tanimoto) is this still legit?
*/
public static void compare_recommenders(DataModel model) {
try{
// item_recommender_0(model); // GenericItemBasedRecommender PearsonCorrelationSimilarity i0: i1: 0.7906439299870653 (falls to 0.8169254935488462 when shown 0.1 instead of 0.05 of data)
// user_recommender_1(model); // GenericUserBasedRecommender PearsonCorrelationSimilarity NN 2 u1: 1.2964143259771939
// user_recommender_2(model); // GenericUserBasedRecommender PearsonCorrelationSimilarity NN 3 u2: 1.2477680938188427
// user_recommender_3(model); // GenericUserBasedRecommender PearsonCorrelationSimilarity NN 4 u3: 1.3427426657044748
// user_recommender_4(model); // GenericUserBasedRecommender SpearmanCorrelationSimilarity NN 3 (too slow to run)
// user_recommender_5(model); // GenericUserBasedRecommender TanimotoCoefficientSimilarity NN 3 u5: 1.6029225072541364
// user_recommender_6(model); // GenericUserBasedRecommender LogLikelihoodSimilarity NN 3 u6: 1.54983062705351
// user_recommender_7(model); // GenericUserBasedRecommender EuclideanDistanceSimilarity NN 3 u7: 0.9739673405628033 (current best) (falls to 1.1540336843500705 when shown 0.1, to 1.3557942733168595 when shown 0.02; 0.03 gives 1.20927724097539; 0.06 gives 1.0946254967416316;)
// user_recommender_8(model); // GenericUserBasedRecommender EuclideanDistanceSimilarity NN 4 u8: 1.014927766870341
// user_recommender_9(model); // GenericUserBasedRecommender EuclideanDistanceSimilarity NN 2 u9: 0.9906367041198499
// user_recommender_10(model); // GenericUserBasedRecommender EuclideanDistanceSimilarity NN 1 u10: NaN (nonsense query)
// slope1_recommender_11(model); // SlopeOneRecommender -> java.lang.OutOfMemoryError: Java heap space; needs a db.
// svd_recommender_12(model); // SVDRecommender ExpectationMaximizationSVDFactorizer 10 10 s12: 1.3631524191879814
// svd_recommender_13(model); // SVDRecommender ExpectationMaximizationSVDFactorizer 50 20 s13: 1.3859845062294631
// svd_recommender_14(model); // SVDRecommender ExpectationMaximizationSVDFactorizer 5 20 s14: 1.3200476179509515 1.3547408931368938 (diff runs)
// svd_recommender_15(model); // SVDRecommender ExpectationMaximizationSVDFactorizer 3 20 s15: 1.3164864783856183 1.323592386983895 1.372974721109248 1.3557093141310406 1.3530836059620939 1.3338645289897844 when data at 0.1 not 0.5 we get 1.3451332992419183
// item_recommender_16(model); // GenericItemBasedRecommender EuclideanDistanceSimilarity i16: 0.8209523493990665 or when shown 0.1 of the data, falls to 0.8571813876632841
// item_recommender_17(model); // GenericItemBasedRecommender LogLikelihoodSimilarity i17: 0.8376207797982301
// item_recommender_18(model); // GenericItemBasedRecommender TanimotoCoefficientSimilarity i18: i18: 0.7916828258049868
} catch (Exception e ) {
System.out.println("Exception during recommender comparison: "+e);
}
}
/* sample full results
*data model:FileDataModel[dataFile:/Users/danbri/Documents/workspace/mahout-book-examples/beeb/_allpulse2010.csv]
# Item count:12777
# User count:19500
i0:
11/03/23 14:58:54 INFO eval.AbstractDifferenceRecommenderEvaluator: Beginning evaluation using 0.699999988079071 of FileDataModel[dataFile:/Users/danbri/Documents/workspace/mahout-book-examples/beeb/_allpulse2010.csv]
11/03/23 14:58:54 INFO model.GenericDataModel: Processed 900 users
11/03/23 14:58:55 INFO eval.AbstractDifferenceRecommenderEvaluator: Beginning evaluation of 815 users
11/03/23 14:58:55 INFO eval.AbstractDifferenceRecommenderEvaluator: Starting timing of 815 tasks in 2 threads
11/03/23 14:58:55 INFO eval.AbstractDifferenceRecommenderEvaluator: Item exists in test data but not training data: 6605833
11/03/23 14:58:55 INFO eval.AbstractDifferenceRecommenderEvaluator: Average time per recommendation: 19ms
11/03/23 14:58:55 INFO eval.AbstractDifferenceRecommenderEvaluator: Approximate memory used: 81MB / 99MB
11/03/23 14:58:55 INFO eval.AbstractDifferenceRecommenderEvaluator: Unable to recommend in 5 cases
i0: 0.7906439299870622
i17:
11/03/23 14:56:39 INFO eval.AbstractDifferenceRecommenderEvaluator: Beginning evaluation using 0.699999988079071 of FileDataModel[dataFile:/Users/danbri/Documents/workspace/mahout-book-examples/beeb/_allpulse2010.csv]
11/03/23 14:56:39 INFO model.GenericDataModel: Processed 900 users
11/03/23 14:56:39 INFO eval.AbstractDifferenceRecommenderEvaluator: Beginning evaluation of 815 users
11/03/23 14:56:39 INFO eval.AbstractDifferenceRecommenderEvaluator: Starting timing of 815 tasks in 2 threads
11/03/23 14:56:39 INFO eval.AbstractDifferenceRecommenderEvaluator: Average time per recommendation: 145ms
11/03/23 14:56:39 INFO eval.AbstractDifferenceRecommenderEvaluator: Approximate memory used: 41MB / 129MB
11/03/23 14:56:39 INFO eval.AbstractDifferenceRecommenderEvaluator: Unable to recommend in 2 cases
11/03/23 14:56:45 INFO eval.AbstractDifferenceRecommenderEvaluator: Evaluation result: 0.8376207797982301
i17: 0.8376207797982301
11/03/23 15:04:49 INFO eval.AbstractDifferenceRecommenderEvaluator: Beginning evaluation using 0.699999988079071 of FileDataModel[dataFile:/Users/danbri/Documents/workspace/mahout-book-examples/beeb/_allpulse2010.csv]
11/03/23 15:04:49 INFO model.GenericDataModel: Processed 900 users
11/03/23 15:04:49 INFO eval.AbstractDifferenceRecommenderEvaluator: Beginning evaluation of 815 users
11/03/23 15:04:49 INFO eval.AbstractDifferenceRecommenderEvaluator: Starting timing of 815 tasks in 2 threads
11/03/23 15:04:49 INFO eval.AbstractDifferenceRecommenderEvaluator: Average time per recommendation: 103ms
11/03/23 15:04:49 INFO eval.AbstractDifferenceRecommenderEvaluator: Approximate memory used: 78MB / 129MB
11/03/23 15:04:49 INFO eval.AbstractDifferenceRecommenderEvaluator: Unable to recommend in 2 cases
11/03/23 15:04:54 INFO eval.AbstractDifferenceRecommenderEvaluator: Evaluation result: 0.7916828258049868
i18: 0.7916828258049868
*/
public static void item_recommender_0(DataModel model) {
try {
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model)
throws TasteException {
ItemSimilarity similarity = new PearsonCorrelationSimilarity(model);
return new GenericItemBasedRecommender(model, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("i0: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
public static void user_recommender_1(DataModel model) {
try {
final int neighbours = 2;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u1 results: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
public static void user_recommender_2(DataModel model) {
try {
final int neighbours = 3;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
// UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserSimilarity similarity = new CachingUserSimilarity( new PearsonCorrelationSimilarity(model), model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u2: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
public static void user_recommender_3(DataModel model) {
try {
final int neighbours = 4;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u3: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
/*
* Monstrously slow. For info only.
* SpearmanCorrelationSimilarity implements this idea. You could try using this as the UserSimilarity in the evaluator code from before. Run it, and take a long coffee break. Turn in for the night. It won’t finish anytime soon. This implementation is far slower because it must do some non-trivial work to compute and store these ranks, and is orders of magnitude slower. The Spearman correlation- based similarity metric is expensive to compute, and is therefore possibly of academic interest more than practical use. For some small data sets, it may be desirable.
* */
public static void user_recommender_4(DataModel model) {
try {
// First recommender: User-based, Pearson, nn 3
final int neighbours = 3;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
// slower, see p. 43
UserSimilarity similarity = new CachingUserSimilarity( new SpearmanCorrelationSimilarity(model), model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset); // todo: set smaller here
System.out.println("u4: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
public static void user_recommender_5(DataModel model) {
try {
final int neighbours = 3;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
// TanimotoCoefficientSimilarity ignores pref values
UserSimilarity similarity = new CachingUserSimilarity( new TanimotoCoefficientSimilarity(model), model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u5: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// LogLikelihoodSimilarity
public static void user_recommender_6(DataModel model) {
try {
final int neighbours = 3;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
// TanimotoCoefficientSimilarity ignores pref values
UserSimilarity similarity = new CachingUserSimilarity( new LogLikelihoodSimilarity(model), model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u6: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// EuclideanDistanceSimilarity
public static void user_recommender_7(DataModel model) {
try {
final int neighbours = 3;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
// TanimotoCoefficientSimilarity ignores pref values
UserSimilarity similarity = new CachingUserSimilarity( new EuclideanDistanceSimilarity(model), model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u7: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// EuclideanDistanceSimilarity
public static void user_recommender_8(DataModel model) {
try {
final int neighbours = 4;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
UserSimilarity similarity = new CachingUserSimilarity( new EuclideanDistanceSimilarity(model), model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u8: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// EuclideanDistanceSimilarity
public static void user_recommender_9(DataModel model) {
try {
final int neighbours = 2;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
UserSimilarity similarity = new CachingUserSimilarity( new EuclideanDistanceSimilarity(model), model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u9: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// EuclideanDistanceSimilarity
public static void user_recommender_10(DataModel model) {
try {
final int neighbours = 1;
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
UserSimilarity similarity = new CachingUserSimilarity( new EuclideanDistanceSimilarity(model), model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(neighbours, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("u10: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// SlopeOneRecommender
public static void slope1_recommender_11(DataModel model) {
try {
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
return new SlopeOneRecommender(model);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("s11: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// SVDRecommender ExpectationMaximizationSVDFactorizer 10, 10
/* Note from mahout list: SVDRecommender is really sensitive to the random number seed. AADRE gives about a 20% spread in its evaluations. (I have only tried AverageAbsoluteDifferenceRecommenderEvaluator.) */
public static void svd_recommender_12(DataModel model) {
try {
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
// http://search-lucene.com/jd/mahout/core/org/apache/mahout/cf/taste/impl/recommender/svd/Factorizer.html
// ExpectationMaximizationSVDFactorizer(DataModel dataModel, int numFeatures, int numIterations)
Factorizer f = new ExpectationMaximizationSVDFactorizer(model, 10, 10);
// or ALSWRFactorizer(DataModel dataModel, int numFeatures, double lambda, int numIterations)
return new SVDRecommender(model, f);
/* MIA Book says: The first numeric argument is the number of features that the SVD should target. There’s no right answer for this; it would be equivalent to the number of genres that you condense someone’s musical taste into, in the previous example. The second argument is the number of “training steps” to run. Think of this as controlling the amount of time it should spend producing this summary; larger values mean longer training. */
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("s12: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// SVDRecommender ExpectationMaximizationSVDFactorizer 20, 50
public static void svd_recommender_13(DataModel model) {
try {
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
Factorizer f = new ExpectationMaximizationSVDFactorizer(model, 50, 20);
return new SVDRecommender(model, f);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("s13: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// SVDRecommender ExpectationMaximizationSVDFactorizer 5, 20
public static void svd_recommender_14(DataModel model) {
try {
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
Factorizer f = new ExpectationMaximizationSVDFactorizer(model, 5, 20);
return new SVDRecommender(model, f);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("s14: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// SVDRecommender ExpectationMaximizationSVDFactorizer 3, 20
public static void svd_recommender_15(DataModel model) {
try {
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
Factorizer f = new ExpectationMaximizationSVDFactorizer(model, 3, 20);
return new SVDRecommender(model, f);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("s15: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
public static void item_recommender_16(DataModel model) {
try {
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model)
throws TasteException {
ItemSimilarity similarity = new CachingItemSimilarity( new EuclideanDistanceSimilarity(model), model);
return new GenericItemBasedRecommender(model, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("i16: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
public static void item_recommender_17(DataModel model) {
try {
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model)
throws TasteException {
ItemSimilarity similarity = new CachingItemSimilarity( new LogLikelihoodSimilarity(model), model);
return new GenericItemBasedRecommender(model, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("i17: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
public static void item_recommender_18(DataModel model) {
try {
RandomUtils.useTestSeed();
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model)
throws TasteException {
ItemSimilarity similarity = new CachingItemSimilarity( new TanimotoCoefficientSimilarity(model), model);
return new GenericItemBasedRecommender(model, similarity);
}
};
double score = evaluator.evaluate( builder, null, model, default_eval_ratio, default_eval_subset);
System.out.println("i18: "+score+"\n\n");
} catch (Exception e ) {
System.out.println("Something awful happened. No, really! It went like this: "+ e);
}
}
// ************************ Code below generates similarity data dumps; to-do, separate out.
public static void ll_sim(DataModel dataModel) {
try{
ItemSimilarity itemSimilarity = new LogLikelihoodSimilarity(dataModel);
LongPrimitiveIterator items1 = dataModel.getItemIDs();
BufferedReader readbuffer = new BufferedReader(new FileReader(dictfile));
String strRead;
Hashtable ix = new Hashtable();
while ((strRead=readbuffer.readLine())!=null){
String splitarray[] = strRead.split("\t");
String firstentry = splitarray[0];
String secondentry = splitarray[1];
//System.out.println(firstentry + " " + secondentry);
ix.put(firstentry.toString(), secondentry.toString() );
}
readbuffer.close();
try{
FileWriter fstream = new FileWriter(outfile); // Create file
BufferedWriter outf = new BufferedWriter(fstream);
System.out.println("Opening file for writing: "+outfile);
int counter = 0;
float f;
while(items1.hasNext()){
counter++;
f = counter / 100; // nope
if ( Math.rint(f) == f ) { System.out.println("Counter: "+counter); }
Long i1 = items1.next();
LongPrimitiveIterator items2 = dataModel.getItemIDs();
while(items2.hasNext()){
Long i2 = items2.next();
if (i1.compareTo(i2) == 0) { System.out.println("skipping i1: "+i1+" i2: "+i2) ;continue; } // should be identical
double res = itemSimilarity.itemSimilarity(i1, i2);
double invres = itemSimilarity.itemSimilarity(i2, i1);
if (! Double.isNaN(res) ) {
if (java.lang.Double.compare(res, invres) != 0 ) {
// throw new Exception("asymmetric similarities; unexpected!"); // wasn't clear what to expect
//System.out.println("ASYM: i1 is "+i1+" i2 is "+i2 +"; Got res: "+res+" invres: "+invres);
}
try {
String from_pid = ix.get(i1.toString()).toString();
String to_pid = ix.get(i2.toString()).toString();
String row = from_pid+", "+to_pid+", "+res +"\n";
// System.out.println(row);
outf.write(row);
} catch (Exception e) {
System.out.println("Dictionary of item IDs failed to match: "+i1.toString() +" or " + i2.toString() );
}
// here we could consult metadata, but needs a better workflow (eg. local cache)
// fetch a string from pid rdf url and : model.read(resultIn, "");
///String httpResult = getURLResult("http://www.bbc.co.uk/programmes/"+from_pid+".rdf");
//System.out.println(httpResult);
// model.read(httpResult,"");
}
}
} // Close the output stream
outf.close();
System.out.println("Closed output file. ");
}catch (Exception e){//Catch exception if any
System.err.println("Error: " + e.getMessage());
}
System.out.println("Done.");
}
catch (Exception e ) {
System.out.println("Exception: "+e);
}
}
public static String getURLResult(String httpUrl) {
String inputLine;
StringBuffer html = new StringBuffer("");
URL url = null;
boolean returnResult = true;
try {
url = new URL(httpUrl);
URLConnection yc = url.openConnection();
yc.setRequestProperty("Host", url.getHost());
yc.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322;)");
BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream()));
while ((inputLine = in.readLine()) != null)
html.append(inputLine);
in.close();
} catch (Exception e)
{
returnResult = false;
e.printStackTrace();
}
String httpResult = html.toString();
return (httpResult);
}
}
/*
// RDF notes:
Model model = ModelFactory.createDefaultModel();
StringReader resultIn = new StringReader("<http://215rg");
model.read(resultIn, "");
http://mahout.apache.org/javadoc/core/org/apache/mahout/cf/taste/impl/common/LongPrimitiveIterator.html
LongPrimitiveIterator getUserIDs()
LongPrimitiveIterator users = dataModel.getUserIDs();
https://hudson.apache.org/hudson/job/Mahout-Quality/javadoc/org/apache/mahout/cf/taste/similarity/ItemSimilarity.html#itemSimilarity(long, long)
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment