Skip to content

Instantly share code, notes, and snippets.

@dorukcan
Created May 14, 2018 12:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dorukcan/be26dc647eff42e5036e54fffdb0e2ab to your computer and use it in GitHub Desktop.
Save dorukcan/be26dc647eff42e5036e54fffdb0e2ab to your computer and use it in GitHub Desktop.
-- fill empty values with null
UPDATE movielens.movies
SET genres = NULL
WHERE genres = '(no genres listed)';
-- combine all tables into one table
CREATE MATERIALIZED VIEW movielens.combined AS
WITH _movies AS (
SELECT
movieid,
title,
regexp_split_to_array(genres, '\|') AS genres
FROM movielens.movies
)
SELECT
ratings.userid AS user_id,
ratings.movieid AS movie_id,
MAX(_movies.title) AS movie_title,
MAX(_movies.genres) AS movie_genres,
AVG(ratings.rating) AS user_rating,
array_agg(tags.tag) AS user_tags
FROM movielens.ratings
INNER JOIN movielens.tags
ON tags.userid = ratings.userid
AND tags.movieid = ratings.movieid
INNER JOIN _movies
ON _movies.movieid = ratings.movieid
GROUP BY user_id, movie_id;
-- select a movie and fetch its fields
WITH my_movie AS (
SELECT
movie_id,
movie_title,
MAX(movie_genres) as movie_genres,
UNNEST(user_tags) as user_tag
FROM movielens.combined
WHERE movie_title LIKE '%Shawshank%'
GROUP BY movie_id, movie_title, user_tag
)
SELECT
movie_id,
movie_title,
MAX(movie_genres) AS movie_genres,
ARRAY_AGG(user_tag) AS user_tags
FROM my_movie
GROUP BY movie_id, movie_title;
-- calculate weighted average of each movie
SELECT
movie_title,
COUNT(*) AS vote_count,
ROUND(AVG(user_rating), 2) AS vote_average,
ROUND((COUNT(*) :: NUMERIC / (COUNT(*) + 50) * AVG(user_rating)) + (50 :: NUMERIC / (COUNT(*) + 50) * 3.701), 2) AS score
FROM movielens.combined
GROUP BY movie_title
ORDER BY score DESC;
-- text content of movies
SELECT DISTINCT ON (movie_title)
movie_title,
array_to_string(movie_genres, ' ') AS movie_genres,
array_to_string(user_tags, ' ') AS user_tags
FROM movielens.combined
WHERE movie_genres NOTNULL;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment