Skip to content

Instantly share code, notes, and snippets.

@rawkintrevo
Last active April 7, 2017 01:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rawkintrevo/c1bb00896263bdc067ddcd8299f4794c to your computer and use it in GitHub Desktop.
Save rawkintrevo/c1bb00896263bdc067ddcd8299f4794c to your computer and use it in GitHub Desktop.
/**
* Created by rawkintrevo on 4/5/17.
*/
// Only need these to intelliJ doesn't whine
import org.apache.mahout.math._
import org.apache.mahout.math.scalabindings._
import org.apache.mahout.math.drm._
import org.apache.mahout.math.scalabindings.RLikeOps._
import org.apache.mahout.math.drm.RLikeDrmOps._
import org.apache.mahout.sparkbindings._
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)
// </pandering to intellij>
val inputRDD = sc.parallelize(Array( ("u1", "purchase", "iphone"),
("u1","purchase","ipad"),
("u2","purchase","nexus"),
("u2","purchase","galaxy"),
("u3","purchase","surface"),
("u4","purchase","iphone"),
("u4","purchase","galaxy"),
("u1","category-browse","phones"),
("u1","category-browse","electronics"),
("u1","category-browse","service"),
("u2","category-browse","accessories"),
("u2","category-browse","tablets"),
("u3","category-browse","accessories"),
("u3","category-browse","service"),
("u4","category-browse","phones"),
("u4","category-browse","tablets")) )
import org.apache.mahout.math.indexeddataset.{IndexedDataset, BiDictionary}
import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
val purchasesIDS = IndexedDatasetSpark.apply(inputRDD.filter(_._2 == "purchase").map(o => (o._1, o._3)))(sc)
val browseIDS = IndexedDatasetSpark.apply(inputRDD.filter(_._2 == "category-browse").map(o => (o._1, o._3)))(sc)
import org.apache.mahout.math.cf.SimilarityAnalysis
val llrDrmList = SimilarityAnalysis.cooccurrencesIDSs(Array(purchasesIDS, browseIDS),
randomSeed = 1234,
maxInterestingItemsPerThing = 3,
maxNumInteractions = 4)
val llrAtA = llrDrmList(0).matrix.collect
/**
llrAtA: org.apache.mahout.math.Matrix =
{
0 => {4:1.7260924347106847}
1 => {}
2 => {3:1.7260924347106847}
3 => {2:1.7260924347106847}
4 => {0:1.7260924347106847}
}
*/
val llrAtB = llrDrmList(1).matrix.collect
/**
llrAtB: org.apache.mahout.math.Matrix =
{
0 => {3:5.545177444479561}
1 => {0:1.7260924347106847,1:1.7260924347106847}
2 => {2:5.545177444479561,4:1.7260924347106847}
3 => {1:1.7260924347106847,2:1.7260924347106847,4:4.498681156950466}
4 => {0:1.7260924347106847,3:1.7260924347106847}
}
**/
// A little Scala-Fu for pretty printing
import org.apache.mahout.math.scalabindings.MahoutCollections._
import collection._
import JavaConversions._
println("LLR of AtA")
println("I.e. Users tend to convert on product X who also buy product Y- Greater is better")
for (row <- llrAtA) {
println(purchasesIDS.columnIDs.inverse(row.index()))
for (e <- row.nonZeroes()) {
println(s"--${purchasesIDS.columnIDs.inverse(e.index())} : ${e.get()}")
}
}
/**
galaxy
--nexus : 1.7260924347106847
surface
iphone
--ipad : 1.7260924347106847
ipad
--iphone : 1.7260924347106847
nexus
--galaxy : 1.7260924347106847
*/
println("LLR of AtB")
for (row <- llrAtB) {
println(purchasesIDS.columnIDs.inverse(row.index()))
for (e <- row.nonZeroes()) {
println(s"--${browseIDS.columnIDs.inverse(e.index())} : ${e.get()}")
}
}
/**
iphone
--phones : 5.545177444479561
--electronics : 1.7260924347106847
ipad
--phones : 1.7260924347106847
--electronics : 4.498681156950466
--service : 1.7260924347106847
nexus
--accessories : 1.7260924347106847
--tablets : 1.7260924347106847
galaxy
--tablets : 5.545177444479561
surface
--accessories : 1.7260924347106847
--service : 1.7260924347106847
*/
/**
Consider an anonymous user who has browsed phones, electronics, and service
**/
browseIDS.columnIDs
// res41: org.apache.mahout.math.indexeddataset.BiDictionary = Map(tablets -> 3, service -> 1, phones -> 2, electronics -> 4, accessories -> 0)
val anonBrowserHxVec = svec( (browseIDS.columnIDs("phones"), 1) ::
(browseIDS.columnIDs("electronics"), 1) ::
(browseIDS.columnIDs("service"), 1) :: Nil,
cardinality = browseIDS.columnIDs.size)
val anonPurchaseHxVec = svec( (purchasesIDS.columnIDs("iphone"), 1) ::
(purchasesIDS.columnIDs("ipad"), 1) :: Nil,
cardinality = purchasesIDS.columnIDs.size)
val anonRecsVec = llrAtA %*% anonPurchaseHxVec + llrAtB %*% anonBrowserHxVec
for (e <- anonRecsVec.nonZeroes()) {
println(s"${purchasesIDS.columnIDs.inverse(e.index())} : ${e.get()}")
}
/**
surface : 1.7260924347106847
iphone : 8.99736231390093
ipad : 9.67695846108252
*/
import org.apache.mahout.math.scalabindings.MahoutCollections._
for (item <- anonRecsVec.toMap.keys.filterNot(anonPurchaseHxVec.toMap.keys.toSet)){
println(s"${purchasesIDS.columnIDs.inverse(item)} : ${anonRecsVec.get(item).get()}")
}
/**
surface : 1.7260924347106847
**/
@rawkintrevo
Copy link
Author

updated- thx

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment