Skip to content

Instantly share code, notes, and snippets.

@mac2000
Created August 13, 2017 17:44
Show Gist options
  • Save mac2000/9c3943e31453db60aaff7d94143e50be to your computer and use it in GitHub Desktop.
Save mac2000/9c3943e31453db60aaff7d94143e50be to your computer and use it in GitHub Desktop.
sql 2017 graph demo
-- docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=P@ssword" -e "MSSQL_PID=Developer" --cap-add SYS_PTRACE -p 1433:1433 --name sql -it --rm -v C:/Users/AlexandrM/Downloads/ml-100k:/ml-100k -v C:/Users/AlexandrM/Downloads/ml-20m:/ml-20m microsoft/mssql-server-linux
-- http://files.grouplens.org/datasets/movielens/ml-20m.zip
use master;
go
DROP DATABASE movies;
GO
-- Create a graph demo database
CREATE DATABASE movies;
go
USE movies;
go
-- u.user
-- user id | age | gender | occupation | zip code
-- 1|24|M|technician|85711
DROP TABLE IF EXISTS users_temp;
CREATE TABLE users_temp (user_id INT PRIMARY KEY, age INT, gender VARCHAR(1), occupation VARCHAR(100), zip_code VARCHAR(100));
BULK INSERT users_temp
FROM '/ml-100k/u.user'
WITH (DATAFILETYPE = 'char', FIRSTROW = 1, FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', TABLOCK)
GO
DROP TABLE IF EXISTS users;
CREATE TABLE users (user_id INT PRIMARY KEY, age INT, gender VARCHAR(1), occupation VARCHAR(100), zip_code VARCHAR(100)) AS NODE;
INSERT INTO users SELECT user_id, age, gender, occupation, zip_code FROM users_temp
DROP TABLE IF EXISTS users_temp;
-- u.genre
-- genre | id
-- Action|0
DROP TABLE IF EXISTS genres_temp;
CREATE TABLE genres_temp (genre VARCHAR(50), genre_id INT PRIMARY KEY);
BULK INSERT genres_temp
FROM '/ml-100k/u.genre'
WITH (DATAFILETYPE = 'char', FIRSTROW = 1, FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', TABLOCK)
GO
DROP TABLE IF EXISTS genres;
CREATE TABLE genres (genre_id INT PRIMARY KEY, genre VARCHAR(50)) AS NODE;
INSERT INTO genres SELECT genre_id, genre FROM genres_temp
DROP TABLE IF EXISTS genres_temp;
-- u.item
-- movie id | movie title | release date | video release date | IMDb URL | unknown | Action | Adventure | Animation | Children's | Comedy | Crime | Documentary | Drama | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western
-- 1|Toy Story (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0
DROP TABLE IF EXISTS item_temp;
CREATE TABLE item_temp (movie_id INT PRIMARY KEY, movie_title VARCHAR(100), release_date SMALLDATETIME, video_release_date SMALLDATETIME, imdb_url VARCHAR(500), genre_0 INT, genre_1 INT, genre_2 INT, genre_3 INT, genre_4 INT, genre_5 INT, genre_6 INT, genre_7 INT, genre_8 INT, genre_9 INT, genre_10 INT, genre_11 INT, genre_12 INT, genre_13 INT, genre_14 INT, genre_15 INT, genre_16 INT, genre_17 INT, genre_18 INT);
BULK INSERT item_temp
FROM '/ml-100k/u.item'
WITH (DATAFILETYPE = 'char', FIRSTROW = 1, FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', TABLOCK)
GO
DROP TABLE IF EXISTS movies;
CREATE TABLE movies (movie_id INT PRIMARY KEY, movie_title VARCHAR(100), release_date SMALLDATETIME, video_release_date SMALLDATETIME, imdb_url VARCHAR(500)) AS NODE;
INSERT INTO movies SELECT movie_id, movie_title, release_date, video_release_date, imdb_url FROM item_temp
-- u.data
-- user id | item id | rating | timestamp
-- 196 242 3 881250949
DROP TABLE IF EXISTS ratings_temp;
CREATE TABLE ratings_temp (user_id INT, movie_id INT, rating INT, timestamp BIGINT);
GO
BULK INSERT ratings_temp
FROM '/ml-100k/u.data'
WITH (DATAFILETYPE = 'char', FIRSTROW = 1, FIELDTERMINATOR = '\t', ROWTERMINATOR = '0x0a', TABLOCK)
GO
DROP TABLE IF EXISTS ratings;
CREATE TABLE ratings (rating INTEGER, date SMALLDATETIME) AS EDGE;
INSERT INTO ratings
select U.$node_id, M.$node_id, rating, DATEADD(second, ([timestamp] - DATEDIFF(second,GETDATE(),GETUTCDATE())), CAST('1970-01-01 00:00:00' AS datetime)) AS date from ratings_temp as T join movies M ON T.movie_id = M.movie_id join users U ON T.user_id = U.user_id
DROP TABLE IF EXISTS ratings_temp;
DROP TABLE IF EXISTS belongs;
CREATE TABLE belongs AS EDGE;
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 0 where genre_0 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 1 where genre_1 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 2 where genre_2 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 3 where genre_3 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 4 where genre_4 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 5 where genre_5 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 6 where genre_6 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 7 where genre_7 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 8 where genre_8 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 9 where genre_9 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 10 where genre_10 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 11 where genre_11 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 12 where genre_12 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 13 where genre_13 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 14 where genre_14 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 15 where genre_15 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 16 where genre_16 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 17 where genre_17 = 1
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 18 where genre_18 = 1
DROP TABLE IF EXISTS item_temp;
-- Get movies belongs to Sci-Fi aka join
select m.movie_id, m.movie_title, g.genre from movies AS m, genres AS g, belongs AS b where match(m-(b)->g) and g.genre = 'Sci-Fi'
-- Kind of top rated movies
select m.movie_title, sum(r.rating) / count(r.rating) AS avg, sum(r.rating) as sum, count(r.rating) as count from movies as m, ratings as r, users as u where match(m<-(r)-u) group by m.movie_title order by avg desc, count desc
-- 257 men in black - other users who also like man in the black
select u.user_id, u.age, u.gender, u.occupation, r.rating, r.date from users u, movies m, ratings r where match(u-(r)->m) and m.movie_id = 257
-- user 472 favorite genres (aka user profile)
select u.user_id, /*u.age, u.gender, u.occupation,*/ g.genre, count(r.rating) as four_plus_ratings_count from users u, ratings r, movies m, genres g, belongs b where match (u-(r)->m-(b)->g) and r.rating >= 4
and u.user_id = 472
group by u.user_id, /*u.age, u.gender, u.occupation,*/ g.genre
order by four_plus_ratings_count desc
-- coloborative recomendation
SELECT
TOP 10
similar.movie_id,
similar.movie_title,
COUNT(*) as total_ratings
from
movies as my_movie,
users as U,
ratings as ratings_other,
ratings as ratings_this,
movies as similar
where my_movie.movie_id = 257 and match(similar<-(ratings_other)-U-(ratings_this)->my_movie)
GROUP BY similar.movie_id, similar.movie_title
ORDER BY COUNT(*) DESC
-- similar users
SELECT
me.user_id AS me,
others.user_id AS other_id,
others.age,
others.gender,
others.occupation,
my_movies.movie_id,
my_movies.movie_title,
my_ratings.rating as my_rating,
other_ratings.rating as others_rating,
ABS(my_ratings.rating-other_ratings.rating) as delta
from
users as me,
ratings as my_ratings,
movies as my_movies,
ratings as other_ratings,
users as others
where match(me-(my_ratings)->my_movies<-(other_ratings)-others) and me.user_id = 472
order by delta
-- top 10 similar users, grouped
select top 10 me, other_id, sum(delta) as delta from (
SELECT
me.user_id AS me,
others.user_id AS other_id,
ABS(my_ratings.rating-other_ratings.rating) as delta
from
users as me,
ratings as my_ratings,
movies as my_movies,
ratings as other_ratings,
users as others
where match(me-(my_ratings)->my_movies<-(other_ratings)-others) and me.user_id = 472
) as q group by me, other_id
order by delta
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment