Skip to content

Instantly share code, notes, and snippets.

@t-redactyl
Last active December 18, 2015 02:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save t-redactyl/6b519437844f297a4eca to your computer and use it in GitHub Desktop.
Save t-redactyl/6b519437844f297a4eca to your computer and use it in GitHub Desktop.
MySQL code for the blog post: Finding the highest rated Christmas movies in MovieLens 10M (23/12/2015)
-- Create tables
DROP TABLE IF EXISTS ratingsdata;
CREATE TABLE ratingsdata (
userid INT,
itemid INT,
rating INT,
timestamp INT,
PRIMARY KEY (userid, itemid));
DROP TABLE IF EXISTS movies;
CREATE TABLE movies (
itemid INT PRIMARY KEY,
title TEXT,
genres TEXT,
action INT DEFAULT 0,
adventure INT DEFAULT 0,
animation INT DEFAULT 0,
childrens INT DEFAULT 0,
comedy INT DEFAULT 0,
crime INT DEFAULT 0,
documentary INT DEFAULT 0,
drama INT DEFAULT 0,
fantasy INT DEFAULT 0,
noir INT DEFAULT 0,
horror INT DEFAULT 0,
musical INT DEFAULT 0,
mystery INT DEFAULT 0,
romance INT DEFAULT 0,
scifi INT DEFAULT 0,
thriller INT DEFAULT 0,
war INT DEFAULT 0,
western INT DEFAULT 0);
-- Create table from web scraped list of Christmas movies
DROP TABLE IF EXISTS christmas;
CREATE TABLE christmas (
name TEXT);
-- Create indexes
CREATE INDEX usersdata_index ON ratingsdata (userid);
CREATE INDEX itemsdata_index ON ratingsdata (itemid);
-- Loading in data
LOAD DATA LOCAL INFILE 'path/to/data/ratings.dat'
INTO TABLE ratingsdata
FIELDS TERMINATED BY '::';
LOAD DATA LOCAL INFILE 'path/to/data/movies.dat'
INTO TABLE movies
FIELDS TERMINATED BY '::';
-- Import web scraped list of Christmas movies to table
LOAD DATA LOCAL INFILE 'path/to/data/christmas_movies.txt'
INTO TABLE christmas;
-- Query to merge the top Christmas movies with the MovieLens 10M list
SELECT COUNT(name)
FROM (
SELECT *
FROM christmas
INNER JOIN movies
ON UPPER(movies.title)
LIKE CONCAT('%', UPPER(christmas.name), '%')
) AS counts;
-- 35 of the 50 Christmas movies were found.
-- Christmas movies IDs table.
DROP TABLE IF EXISTS christmasids;
CREATE TABLE christmasids (
itemid INT);
INSERT INTO christmasids
SELECT movies.itemid
FROM christmas
INNER JOIN movies
ON UPPER(movies.title)
LIKE CONCAT('%', UPPER(christmas.name), '%');
-- Update the movie table by filling in genre binaries
-- Action
UPDATE movies
SET action = 1
WHERE genres LIKE '%Action%';
-- Adventure
UPDATE movies
SET adventure = 1
WHERE genres LIKE '%Adventure%';
-- Animation
UPDATE movies
SET animation = 1
WHERE genres LIKE '%Animation%';
-- Children's
UPDATE movies
SET childrens = 1
WHERE genres LIKE '%Children%';
-- Comedy
UPDATE movies
SET comedy = 1
WHERE genres LIKE '%Comedy%';
-- Crime
UPDATE movies
SET crime = 1
WHERE genres LIKE '%Crime%';
-- Documentary
UPDATE movies
SET documentary = 1
WHERE genres LIKE '%Documentary%';
-- Drama
UPDATE movies
SET drama = 1
WHERE genres LIKE '%Drama%';
-- Fantasy
UPDATE movies
SET fantasy = 1
WHERE genres LIKE '%Fantasy%';
-- Noir
UPDATE movies
SET noir = 1
WHERE genres LIKE '%Noir%';
-- Horror
UPDATE movies
SET horror = 1
WHERE genres LIKE '%Horror%';
-- Musical
UPDATE movies
SET musical = 1
WHERE genres LIKE '%Musical%';
-- Mystery
UPDATE movies
SET mystery = 1
WHERE genres LIKE '%Mystery%';
-- Romance
UPDATE movies
SET romance = 1
WHERE genres LIKE '%Romance%';
-- Sci-Fi
UPDATE movies
SET scifi = 1
WHERE genres LIKE '%Sci-Fi%';
-- Thriller
UPDATE movies
SET thriller = 1
WHERE genres LIKE '%Thriller%';
-- War
UPDATE movies
SET war = 1
WHERE genres LIKE '%War%';
-- Western
UPDATE movies
SET western = 1
WHERE genres LIKE '%Western%';
-- Fix messed up special characters in "Joyeux Noël"
UPDATE movies
SET title = REPLACE(title,'Merry Christmas (Joyeux Noël) (2005)',
'Merry Christmas (Joyeux Noël) (2005)');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment