Skip to content

Instantly share code, notes, and snippets.

View sanealytics's full-sized avatar

Saurabh Bhatnagar sanealytics

View GitHub Profile
# coding=UTF-8
from __future__ import division
import nltk
from collections import Counter
# This is a simple tool for adding automatic hashtags into an article title
# Created by Shlomi Babluki
# Sep, 2013
@sanealytics
sanealytics / recommenderlab-1-6.R
Created June 12, 2012 16:24
Recommenderlab walkthrough 1-6
recommenderRegistry$get_entries(dataType = "realRatingMatrix")
# We have a few options
# Let's check some algorithms against each other
scheme <- evaluationScheme(MovieLense, method = "split", train = .9,
k = 1, given = 10, goodRating = 4)
scheme
algorithms <- list(
@sanealytics
sanealytics / recommenderlab-1-5.R
Created June 12, 2012 16:23
Recommenderlab walkthrough 1-5
# What is the mean rating of each movie
qplot(colMeans(MovieLense), binwidth = .1,
main = "Mean rating of Movies",
xlab = "Rating",
ylab = "# of movies")
# The big spike on 1 suggests that this could also be intepreted as binary
# In other words, some people don't want to see certain movies at all.
# Same on 5 and on 3.
# We will give it the binary treatment later
@sanealytics
sanealytics / recommenderlab-1-4.R
Created June 12, 2012 16:21
Recommenderlab walkthrough 1-4
# How many movies did people rate on average
qplot(rowCounts(MovieLense), binwidth = 10,
main = "Movies Rated on average",
xlab = "# of users",
ylab = "# of movies rated")
# Seems people get tired of rating movies at a logarithmic pace. But most rate some.
@sanealytics
sanealytics / recommenderlab-1-3.R
Created June 12, 2012 16:20
Recommenderlab walkthrough 1-3
# How about after normalization?
qplot(getRatings(normalize(MovieLense, method = "Z-score")),
main = "Histogram of normalized ratings", xlab = "Rating")
summary(getRatings(normalize(MovieLense, method = "Z-score"))) # seems better
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# -4.8520 -0.6466 0.1084 0.0000 0.7506 4.1280
@sanealytics
sanealytics / recommenderlab-1-2.R
Created June 12, 2012 16:19
Recommenderlab walkthrough 1-2
# Visualizing ratings
qplot(getRatings(MovieLense), binwidth = 1,
main = "Histogram of ratings", xlab = "Rating")
summary(getRatings(MovieLense)) # Skewed to the right
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 1.00 3.00 4.00 3.53 4.00 5.00
@sanealytics
sanealytics / recommenderlab-1-1.R
Created June 12, 2012 14:49
Recommenderlab walkthrough 1-1
# Load required library
library(recommenderlab) # package being evaluated
library(ggplot2) # For plots
# Load the data we are going to work with
data(MovieLense)
MovieLense
# 943 x 1664 rating matrix of class ‘realRatingMatrix’ with 99392 ratings.
# Visualizing a sample of this