Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Book-Crossing Dataset
-- 1.1.1 登録ユーザー数, アクティブユーザー数 --
td query -w -d book_crossing_dataset "
SELECT t1.cnt AS all_users, t2.cnt AS active_users, ROUND(t2.cnt/t1.cnt*100) AS active_rate
FROM
(
SELECT COUNT(distinct user_id) as cnt, 1 AS one
FROM users
) t1
JOIN
(
SELECT COUNT(distinct user_id) as cnt, 1 AS one
FROM ratings
) t2
ON
(t1.one=t2.one)
"
-- 1.1.2 片方のテーブルにしか含まれないユーザー数 --
td query -w -d book_crossing_dataset "
SELECT t AS type, cnt
FROM
(
SELECT COUNT(*) AS cnt, 'only in users' AS t
FROM
(
SELECT user_id
FROM users
GROUP BY user_id
) t1
LEFT OUTER JOIN
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
WHERE t2.user_id IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'only in ratings' AS t
FROM
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t1
LEFT OUTER JOIN
(
SELECT user_id
FROM users
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
WHERE t2.user_id IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'both' AS t
FROM
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t1
JOIN
(
SELECT user_id
FROM users
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
) o1
"
-- 1.2.1 登録ブック数, アクティブブック数 --
td query -w -d book_crossing_dataset "
SELECT t1.cnt AS all_books, t2.cnt AS active_books, ROUND(t2.cnt/t1.cnt*100) AS active_rate
FROM
(
SELECT COUNT(distinct isbn) as cnt, 1 AS one
FROM books
) t1
JOIN
(
SELECT COUNT(distinct isbn) as cnt, 1 AS one
FROM ratings
) t2
ON
(t1.one=t2.one)
"
-- 1.2.2 片方のテーブルにしか含まれないブック数 --
td query -w -d book_crossing_dataset "
SELECT t AS type, cnt
FROM
(
SELECT COUNT(*) AS cnt, 'only in books' AS t
FROM
(
SELECT isbn
FROM books
GROUP BY isbn
) t1
LEFT OUTER JOIN
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
WHERE t2.isbn IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'only in ratings' AS t
FROM
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t1
LEFT OUTER JOIN
(
SELECT isbn
FROM books
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
WHERE t2.isbn IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'both' AS t
FROM
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t1
JOIN
(
SELECT isbn
FROM books
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
) o1
"
-- 1.3.1 総評価数 --
td query -w -d book_crossing_dataset "
SELECT COUNT(*) AS all_reviews
FROM ratings
"
-- 1.3.2 有効評価数,平均評価値 --
td query -w -d book_crossing_dataset "
SELECT COUNT(*) AS valid_reviews, ROUND(AVG(book_rating)*100)/100 AS avg_of_reviews
FROM ratings
WHERE 0 < book_rating
"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.