Skip to content

Instantly share code, notes, and snippets.

@doryokujin
Created June 28, 2012 01:25
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save doryokujin/3008099 to your computer and use it in GitHub Desktop.
Save doryokujin/3008099 to your computer and use it in GitHub Desktop.
Book-Crossing Dataset
-- 1.1.1 登録ユーザー数, アクティブユーザー数 --
td query -w -d book_crossing_dataset "
SELECT t1.cnt AS all_users, t2.cnt AS active_users, ROUND(t2.cnt/t1.cnt*100) AS active_rate
FROM
(
SELECT COUNT(distinct user_id) as cnt, 1 AS one
FROM users
) t1
JOIN
(
SELECT COUNT(distinct user_id) as cnt, 1 AS one
FROM ratings
) t2
ON
(t1.one=t2.one)
"
-- 1.1.2 片方のテーブルにしか含まれないユーザー数 --
td query -w -d book_crossing_dataset "
SELECT t AS type, cnt
FROM
(
SELECT COUNT(*) AS cnt, 'only in users' AS t
FROM
(
SELECT user_id
FROM users
GROUP BY user_id
) t1
LEFT OUTER JOIN
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
WHERE t2.user_id IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'only in ratings' AS t
FROM
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t1
LEFT OUTER JOIN
(
SELECT user_id
FROM users
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
WHERE t2.user_id IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'both' AS t
FROM
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t1
JOIN
(
SELECT user_id
FROM users
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
) o1
"
-- 1.2.1 登録ブック数, アクティブブック数 --
td query -w -d book_crossing_dataset "
SELECT t1.cnt AS all_books, t2.cnt AS active_books, ROUND(t2.cnt/t1.cnt*100) AS active_rate
FROM
(
SELECT COUNT(distinct isbn) as cnt, 1 AS one
FROM books
) t1
JOIN
(
SELECT COUNT(distinct isbn) as cnt, 1 AS one
FROM ratings
) t2
ON
(t1.one=t2.one)
"
-- 1.2.2 片方のテーブルにしか含まれないブック数 --
td query -w -d book_crossing_dataset "
SELECT t AS type, cnt
FROM
(
SELECT COUNT(*) AS cnt, 'only in books' AS t
FROM
(
SELECT isbn
FROM books
GROUP BY isbn
) t1
LEFT OUTER JOIN
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
WHERE t2.isbn IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'only in ratings' AS t
FROM
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t1
LEFT OUTER JOIN
(
SELECT isbn
FROM books
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
WHERE t2.isbn IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'both' AS t
FROM
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t1
JOIN
(
SELECT isbn
FROM books
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
) o1
"
-- 1.3.1 総評価数 --
td query -w -d book_crossing_dataset "
SELECT COUNT(*) AS all_reviews
FROM ratings
"
-- 1.3.2 有効評価数,平均評価値 --
td query -w -d book_crossing_dataset "
SELECT COUNT(*) AS valid_reviews, ROUND(AVG(book_rating)*100)/100 AS avg_of_reviews
FROM ratings
WHERE 0 < book_rating
"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment