Skip to content

Instantly share code, notes, and snippets.

@pferrel
Forked from rawkintrevo/Mahout Simple CCO
Created April 5, 2017 17:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pferrel/44b41be4e25bef3142ec046d21d8b7f1 to your computer and use it in GitHub Desktop.
Save pferrel/44b41be4e25bef3142ec046d21d8b7f1 to your computer and use it in GitHub Desktop.
/**
* Created by rawkintrevo on 4/5/17.
*/
// Only need these so intelliJ doesn't complain
import org.apache.mahout.math._
import org.apache.mahout.math.scalabindings._
import org.apache.mahout.math.drm._
import org.apache.mahout.math.scalabindings.RLikeOps._
import org.apache.mahout.math.drm.RLikeDrmOps._
import org.apache.mahout.sparkbindings._
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)
// all this ^^ has been created for you by ./mahout spark-shell but it makes intellij happy
// don't forget these!
// export SPARK_HOME=$HOME/gits/spark-1.6.2-bin-hadoop2.6
// ../mahout/bin/mahout spark-shell
import org.apache.mahout.math.indexeddataset.{IndexedDataset, BiDictionary}
import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
val userMap = List("Andrew", "Sebastian", "Ted", "Sarah", "Alexy", "Isabelle", "Pat").zipWithIndex.toMap
val rowIDs = new BiDictionary(userMap)
val productMap = List("iPhone5", "iPhone6", "Galaxy", "Nexus", "iPad", "Surface").zipWithIndex.toMap
// 0 1 2 3 4 5
val colIDs = new BiDictionary(productMap)
val buyIndicatorMatrix = sparse((0, 1) :: Nil, // Andrew
(2, 1) :: Nil, // Sebastian
(4, 1) :: Nil, // Ted
(0, 1) :: Nil, // Sarah
(2, 1) :: Nil, // Alexey
(2, 1) :: Nil) // Isabelle
val buyIndicatorDRM = drmParallelize(buyIndicatorMatrix)
val buyIndicatorIDS = new IndexedDatasetSpark(buyIndicatorDRM, rowIDs, colIDs)
val viewIndicatorMatrix = sparse( (0, 1) :: (2, 1) :: (3, 1) :: Nil, // Andrew
(0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil, // Sebastian
(1, 1) :: (4, 1) :: (5, 1) :: Nil, // Ted
(0, 1) :: (2, 1) :: (5, 1) :: Nil, // Sarah
(2, 1) :: (5, 1) :: Nil, // Isabelle
(0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil) // Pat
val viewIndicatorDRM = drmParallelize(viewIndicatorMatrix)
val viewIndicatorIDS = new IndexedDatasetSpark(viewIndicatorDRM, rowIDs, colIDs)
import org.apache.mahout.math.cf.SimilarityAnalysis
val ccoDRMS = SimilarityAnalysis.cooccurrencesIDSs(Array(buyIndicatorIDS, viewIndicatorIDS),
randomSeed = 1234,
maxInterestingItemsPerThing = 1)
val logLikelihoods = ccoDRMS(0).matrix.collect // THESE ARE MISNAMED LLRS
val invertedScores = ccoDRMS(1).matrix.collect
/**
invertedScores: org.apache.mahout.math.Matrix =
{
0 => {3:2.6341457841558764}
1 => {}
2 => {2:1.5876494966267813}
3 => {}
4 => {1:5.406734506395658}
}
**/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment