Skip to content

Instantly share code, notes, and snippets.

Dana Groce danared

  • MongoDB
View GitHub Profile
View mongospark
> db.personal_ratings.find()
{ "_id" : ObjectId("57226a50a45eff77e4dc3fce"), "user_id" : "0", "movie_id" : "1", "rating" : "4" }
{ "_id" : ObjectId("57226a50a45eff77e4dc3fcf"), "user_id" : "0", "movie_id" : "2", "rating" : "4" }
{ "_id" : ObjectId("57226a50a45eff77e4dc3fd0"), "user_id" : "0", "movie_id" : "16", "rating" : "5" }
{ "_id" : ObjectId("57226a50a45eff77e4dc3fd1"), "user_id" : "0", "movie_id" : "19", "rating" : "3" }
{ "_id" : ObjectId("57226a50a45eff77e4dc3fd2"), "user_id" : "0", "movie_id" : "47", "rating" : "4" }
{ "_id" : ObjectId("57226a50a45eff77e4dc3fd3"), "user_id" : "0", "movie_id" : "70", "rating" : "4" }
{ "_id" : ObjectId("57226a50a45eff77e4dc3fd4"), "user_id" : "0", "movie_id" : "163", "rating" : "5" }
{ "_id" : ObjectId("57226a50a45eff77e4dc3fd5"), "user_id" : "0", "movie_id" : "173", "rating" : "1" }
{ "_id" : ObjectId("57226a50a45eff77e4dc3fd6"), "user_id" : "0", "movie_id" : "356", "rating" : "5" }
View mongospark
// Save to MongoDB
MongoSpark.save(userRecommendations.write.mode("overwrite"), writeConfig)
View mongospark
// Get user recommendations
import sqlContext.implicits._
val unratedMovies = movieRatings.filter(s"user_id != $userId").select("movie_id").distinct().map(r =>
(userId, r.getAs[Int]("movie_id"))).toDF("user_id", "movie_id")
val recommendations = combinedModel.transform(unratedMovies)
// Convert the recommendations into UserMovieRatings
val userRecommendations = recommendations.map(r =>
UserMovieRating(0, r.getAs[Int]("movie_id"), r.getAs[Float]("prediction").toInt)).toDF()
View mongospark
// Combine the datasets
val userRatings = MongoSpark.load(sc, readConfig.copy(collectionName = "personal_ratings")).toDF[UserMovieRating]
val combinedRatings = movieRatings.unionAll(userRatings)
// Retrain using the combinedRatings
val combinedModel = als.fit(combinedRatings, bestModel.extractParamMap())
View mongospark
// Calculating the best model
val bestModel = trainedAndValidatedModel.fit(movieRatings)
View mongospark
val trainedAndValidatedModel = new TrainValidationSplit()
.setEstimator(als)
.setEvaluator(new RegressionEvaluator().setMetricName("rmse").setLabelCol("rating").setPredictionCol("prediction"))
.setEstimatorParamMaps(paramGrid)
.setTrainRatio(0.8)
View mongospark
// We use a ParamGridBuilder to construct a grid of parameters to search over.
// TrainValidationSplit will try all combinations of values and determine best model using the ALS evaluator.
val paramGrid = new ParamGridBuilder()
.addGrid(als.regParam, Array(0.1, 10.0))
.addGrid(als.rank, Array(8, 10))
.addGrid(als.maxIter, Array(10, 20))
.build()
View mongospark
// Create the ALS instance and map the movie data
val als = new ALS()
.setCheckpointInterval(2)
.setUserCol("user_id")
.setItemCol("movie_id")
.setRatingCol("rating")
You can’t perform that action at this time.