Skip to content

Instantly share code, notes, and snippets.

View sanealytics's full-sized avatar

Saurabh Bhatnagar sanealytics

View GitHub Profile
@sanealytics
sanealytics / recommenderlab-1-1.R
Created June 12, 2012 14:49
Recommenderlab walkthrough 1-1
# Load required library
library(recommenderlab) # package being evaluated
library(ggplot2) # For plots
# Load the data we are going to work with
data(MovieLense)
MovieLense
# 943 x 1664 rating matrix of class ‘realRatingMatrix’ with 99392 ratings.
# Visualizing a sample of this
@sanealytics
sanealytics / recommenderlab-1-2.R
Created June 12, 2012 16:19
Recommenderlab walkthrough 1-2
# Visualizing ratings
qplot(getRatings(MovieLense), binwidth = 1,
main = "Histogram of ratings", xlab = "Rating")
summary(getRatings(MovieLense)) # Skewed to the right
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 1.00 3.00 4.00 3.53 4.00 5.00
@sanealytics
sanealytics / recommenderlab-1-3.R
Created June 12, 2012 16:20
Recommenderlab walkthrough 1-3
# How about after normalization?
qplot(getRatings(normalize(MovieLense, method = "Z-score")),
main = "Histogram of normalized ratings", xlab = "Rating")
summary(getRatings(normalize(MovieLense, method = "Z-score"))) # seems better
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# -4.8520 -0.6466 0.1084 0.0000 0.7506 4.1280
@sanealytics
sanealytics / recommenderlab-1-4.R
Created June 12, 2012 16:21
Recommenderlab walkthrough 1-4
# How many movies did people rate on average
qplot(rowCounts(MovieLense), binwidth = 10,
main = "Movies Rated on average",
xlab = "# of users",
ylab = "# of movies rated")
# Seems people get tired of rating movies at a logarithmic pace. But most rate some.
@sanealytics
sanealytics / recommenderlab-1-5.R
Created June 12, 2012 16:23
Recommenderlab walkthrough 1-5
# What is the mean rating of each movie
qplot(colMeans(MovieLense), binwidth = .1,
main = "Mean rating of Movies",
xlab = "Rating",
ylab = "# of movies")
# The big spike on 1 suggests that this could also be intepreted as binary
# In other words, some people don't want to see certain movies at all.
# Same on 5 and on 3.
# We will give it the binary treatment later
@sanealytics
sanealytics / recommenderlab-1-6.R
Created June 12, 2012 16:24
Recommenderlab walkthrough 1-6
recommenderRegistry$get_entries(dataType = "realRatingMatrix")
# We have a few options
# Let's check some algorithms against each other
scheme <- evaluationScheme(MovieLense, method = "split", train = .9,
k = 1, given = 10, goodRating = 4)
scheme
algorithms <- list(
# coding=UTF-8
from __future__ import division
import nltk
from collections import Counter
# This is a simple tool for adding automatic hashtags into an article title
# Created by Shlomi Babluki
# Sep, 2013
runApp(list(
ui = bootstrapPage(
uiOutput("nUI"),
plotOutput('plot'),
submitButton("PlotIt")
),
server = function(input, output, session) {
output$plot <- renderPlot({ hist(runif(input$obs)) })
output$nUI <- renderUI({
@sanealytics
sanealytics / gist:28739e87f8448ecb378f
Created March 10, 2015 07:10
optim set up for RSVD
unroll_Vecs <- function (params, Y, R, num_users, num_movies, num_features) {
# Unrolls vector into X and Theta
# Also calculates difference between preduction and actual
endIdx <- num_movies * num_features
X <- matrix(params[1:endIdx], nrow = num_movies, ncol = num_features)
Theta <- matrix(params[(endIdx + 1): (endIdx + (num_users * num_features))],
nrow = num_users, ncol = num_features)
@sanealytics
sanealytics / recommenderlab-test RSVD
Last active August 29, 2015 14:16
Testing low rank matrix factorization performance
require(recommenderlab) # Install this if you don't have it already
require(devtools) # Install this if you don't have this already
# Get additional recommendation algorithms
install_github("sanealytics", "recommenderlabrats")
data(MovieLense) # Get data
# Divvy it up
scheme <- evaluationScheme(MovieLense, method = "split", train = .9,
k = 1, given = 10, goodRating = 4)