Skip to content

Instantly share code, notes, and snippets.

@doryokujin
Created July 2, 2012 17:18
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save doryokujin/3034398 to your computer and use it in GitHub Desktop.
Save doryokujin/3034398 to your computer and use it in GitHub Desktop.
Book-Crossing Dataset
-- 6.1 共起分析:Simpson係数の高いブックペア上位20 --
td query -w -d book_crossing_dataset "
SELECT r2.title AS title1,
r2.year_of_publication AS year1,
r2.cnt AS cnt1,
r3.title AS title2,
r3.year_of_publication AS year2,
r3.cnt AS cnt2,
r1.cnt AS intersection,
r1.cnt/IF(r2.cnt<r3.cnt,r2.cnt,r3.cnt)*100 AS simpson
FROM
(
SELECT o1.isbn AS isbn1, o2.isbn AS isbn2, COUNT(*) AS cnt
FROM
(
SELECT t1.isbn AS isbn, user_id
FROM
(
SELECT isbn, user_id
FROM ratings
GROUP BY isbn, user_id
) t1
JOIN
(
SELECT isbn, COUNT(book_rating)
FROM ratings
GROUP BY isbn
HAVING 500 < COUNT(book_rating)
) t2
ON
(t1.isbn=t2.isbn)
) o1
JOIN
(
SELECT t1.isbn AS isbn, user_id
FROM
(
SELECT isbn, user_id
FROM ratings
GROUP BY isbn, user_id
) t1
JOIN
(
SELECT isbn, COUNT(book_rating)
FROM ratings
GROUP BY isbn
HAVING 500 < COUNT(book_rating)
) t2
ON
(t1.isbn=t2.isbn)
) o2
ON
(o1.user_id=o2.user_id)
GROUP BY o1.isbn, o2.isbn
HAVING o1.isbn != o2.isbn
) r1
JOIN
(
SELECT t1.isbn AS isbn, year_of_publication,title, cnt
FROM
(
SELECT isbn, COUNT(book_rating) AS cnt
FROM ratings
GROUP BY isbn
) t1
JOIN
(
SELECT isbn,
year_of_publication,
v['book_title'] AS title
FROM books
) t2
ON
(t1.isbn=t2.isbn)
) r2
ON
(r1.isbn1=r2.isbn)
JOIN
(
SELECT t1.isbn AS isbn, year_of_publication,title, cnt
FROM
(
SELECT isbn, COUNT(book_rating) AS cnt
FROM ratings
GROUP BY isbn
) t1
JOIN
(
SELECT isbn,
year_of_publication,
v['book_title'] AS title
FROM books
) t2
ON
(t1.isbn=t2.isbn)
) r3
ON
(r1.isbn2=r3.isbn)
ORDER BY simpson DESC
LIMIT 40
"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment