Created
May 14, 2018 12:34
-
-
Save dorukcan/be26dc647eff42e5036e54fffdb0e2ab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- fill empty values with null | |
UPDATE movielens.movies | |
SET genres = NULL | |
WHERE genres = '(no genres listed)'; | |
-- combine all tables into one table | |
CREATE MATERIALIZED VIEW movielens.combined AS | |
WITH _movies AS ( | |
SELECT | |
movieid, | |
title, | |
regexp_split_to_array(genres, '\|') AS genres | |
FROM movielens.movies | |
) | |
SELECT | |
ratings.userid AS user_id, | |
ratings.movieid AS movie_id, | |
MAX(_movies.title) AS movie_title, | |
MAX(_movies.genres) AS movie_genres, | |
AVG(ratings.rating) AS user_rating, | |
array_agg(tags.tag) AS user_tags | |
FROM movielens.ratings | |
INNER JOIN movielens.tags | |
ON tags.userid = ratings.userid | |
AND tags.movieid = ratings.movieid | |
INNER JOIN _movies | |
ON _movies.movieid = ratings.movieid | |
GROUP BY user_id, movie_id; | |
-- select a movie and fetch its fields | |
WITH my_movie AS ( | |
SELECT | |
movie_id, | |
movie_title, | |
MAX(movie_genres) as movie_genres, | |
UNNEST(user_tags) as user_tag | |
FROM movielens.combined | |
WHERE movie_title LIKE '%Shawshank%' | |
GROUP BY movie_id, movie_title, user_tag | |
) | |
SELECT | |
movie_id, | |
movie_title, | |
MAX(movie_genres) AS movie_genres, | |
ARRAY_AGG(user_tag) AS user_tags | |
FROM my_movie | |
GROUP BY movie_id, movie_title; | |
-- calculate weighted average of each movie | |
SELECT | |
movie_title, | |
COUNT(*) AS vote_count, | |
ROUND(AVG(user_rating), 2) AS vote_average, | |
ROUND((COUNT(*) :: NUMERIC / (COUNT(*) + 50) * AVG(user_rating)) + (50 :: NUMERIC / (COUNT(*) + 50) * 3.701), 2) AS score | |
FROM movielens.combined | |
GROUP BY movie_title | |
ORDER BY score DESC; | |
-- text content of movies | |
SELECT DISTINCT ON (movie_title) | |
movie_title, | |
array_to_string(movie_genres, ' ') AS movie_genres, | |
array_to_string(user_tags, ' ') AS user_tags | |
FROM movielens.combined | |
WHERE movie_genres NOTNULL; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment