Skip to content

Instantly share code, notes, and snippets.

@frankscholten
Created September 12, 2014 13:44
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save frankscholten/d373c575ad721dd0204e to your computer and use it in GitHub Desktop.
Save frankscholten/d373c575ad721dd0204e to your computer and use it in GitHub Desktop.
Spark ItemSimilarity Java
package sparkexample;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import org.apache.mahout.drivers.DefaultElementReadSchema;
import org.apache.mahout.drivers.Schema;
import org.apache.mahout.drivers.TextDelimitedIndexedDatasetReader;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.MatrixSlice;
import org.apache.mahout.math.cf.SimilarityAnalysis;
import org.apache.mahout.math.drm.DistributedContext;
import org.apache.mahout.math.drm.DrmLike;
import org.apache.mahout.sparkbindings.SparkDistributedContext;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import scala.collection.JavaConverters;
import java.util.List;
/**
* Mahout Recommender which performs the following steps:
*
* 1. Configures Spark
* 2. Reads user / item data
* 3. Runs cooccurrence analysis
* 4. TODO: Writes the results into ElasticSearch
*/
public class MahoutRecommenderMain {
public static void main(String[] args) {
// Configure Spark
SparkConf config = new SparkConf();
config.setMaster("local");
config.setAppName("Mahout Recommender");
SparkContext sparkContext = new SparkContext(config);
DistributedContext distributedContext = new SparkDistributedContext(sparkContext);
// Read user / item data
Schema schema = new DefaultElementReadSchema();
TextDelimitedIndexedDatasetReader reader = new TextDelimitedIndexedDatasetReader(schema, distributedContext);
DrmLike<Object> drmA = reader.readElementsFrom("data/article_views.txt", HashBiMap.<String, Object>create()).matrix();
// Run cooccurrence analysis
scala.collection.immutable.List<DrmLike<Object>> cooccurrences = SimilarityAnalysis.cooccurrences(drmA, 0xdeadbeef, Integer.MAX_VALUE, Integer.MAX_VALUE, null);
List<DrmLike<Object>> indicatorMatrices = JavaConverters.asJavaListConverter(cooccurrences).asJava();
DrmLike<Object> indicatorMatrix = indicatorMatrices.get(0);
Matrix matrix = indicatorMatrix.checkpoint(null).collect();
// Print results
for (MatrixSlice matrixSlice : matrix) {
System.out.println(matrixSlice);
}
}
}
Copy link

ghost commented Nov 16, 2015

I am getting same error as @amaebi
NotSerializableException: org.apache.mahout.math.DenseVector

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment