Created
July 2, 2012 13:52
-
-
Save doryokujin/3033351 to your computer and use it in GitHub Desktop.
Book-Crossing Dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 5.4.1 一冊当たりの評価数が多いブックTOP20 -- | |
td query -w -d book_crossing_dataset " | |
SELECT t1.isbn AS isbn, title, year_of_publication AS year, rating_cnt, ROUND(avg*100)/100 AS avg, ROUND(stddev*100)/100 AS stddev | |
FROM | |
( | |
SELECT isbn, COUNT(book_rating) AS rating_cnt, AVG(book_rating) AS avg, STDDEV_SAMP(book_rating) AS stddev | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn | |
) t1 | |
JOIN | |
( | |
SELECT isbn, | |
book_title AS title, | |
year_of_publication | |
FROM books | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
ORDER BY rating_cnt DESC | |
LIMIT 20 | |
" | |
-- 5.4.2 評価数 TOP5 の評価分布 WHERE 0<Rating -- | |
td query -w -d book_crossing_dataset -f csv -o book_dist_of_top5_review_cnt.csv " | |
SELECT title, book_rating, cnt, total_cnt, ROUND(cnt/total_cnt*1000)/10 AS rate | |
FROM | |
( | |
SELECT t1.isbn AS isbn, book_rating, COUNT(*) AS cnt, total_cnt | |
FROM ratings t1 | |
JOIN | |
( | |
SELECT isbn, COUNT(book_rating) AS total_cnt | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn | |
ORDER BY total_cnt DESC | |
LIMIT 5 | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE 0 < book_rating | |
GROUP BY t1.isbn, book_rating, total_cnt | |
) o1 | |
JOIN | |
( | |
SELECT isbn, | |
book_title AS title | |
FROM books | |
) o2 | |
ON | |
(o1.isbn=o2.isbn) | |
ORDER BY total_cnt DESC, book_rating | |
" | |
-- 5.5.1 平均が高かったブックベスト20 (50<=cnt) -- | |
td query -w -d book_crossing_dataset " | |
SELECT t1.isbn AS isbn, title, year_of_publication AS year, ROUND(avg*100)/100 AS avg, cnt, ROUND(stddev*100)/100 AS stddev | |
FROM | |
( | |
SELECT isbn, AVG(book_rating) AS avg, COUNT(book_rating) AS cnt, STDDEV_SAMP(book_rating) AS stddev | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn | |
HAVING 50 <= COUNT(book_rating) | |
) t1 | |
JOIN | |
( | |
SELECT isbn, | |
year_of_publication, | |
book_title AS title | |
FROM books | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
ORDER BY avg DESC | |
LIMIT 20 | |
" | |
-- 5.5.2 評価平均値 TOP5 の評価分布 WHERE 0<Rating -- | |
td query -w -d book_crossing_dataset -f csv -o book_dist_of_top5_review_avg.csv " | |
SELECT title, book_rating, avg, cnt, total_cnt, ROUND(cnt/total_cnt*1000)/10 AS rate | |
FROM | |
( | |
SELECT t1.isbn AS isbn, book_rating, ROUND(avg/100)*100 AS avg, COUNT(*) AS cnt, total_cnt | |
FROM ratings t1 | |
JOIN | |
( | |
SELECT isbn, AVG(book_rating) AS avg, COUNT(book_rating) AS total_cnt | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn | |
HAVING 50 <= COUNT(book_rating) | |
ORDER BY avg DESC | |
LIMIT 5 | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE 0 < book_rating | |
GROUP BY t1.isbn, book_rating, total_cnt, avg | |
) o1 | |
JOIN | |
( | |
SELECT isbn, | |
book_title AS title | |
FROM books | |
) o2 | |
ON | |
(o1.isbn=o2.isbn) | |
ORDER BY avg DESC, book_rating | |
" | |
-- 5.6.1 年代別:平均が高かったブックTOP1 (50<=cnt) -- | |
td query -w -d book_crossing_dataset " | |
SELECT o1.year_of_publication AS year, title, ROUND(max_this_year*100)/100 AS avg, cnt, ROUND(stddev*100)/100 AS stddev | |
FROM | |
( | |
SELECT year_of_publication, MAX(avg) as max_this_year | |
FROM | |
( | |
SELECT isbn, AVG(book_rating) AS avg, COUNT(book_rating) AS cnt | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn | |
HAVING 50 <= COUNT(book_rating) | |
) t1 | |
JOIN | |
( | |
SELECT isbn, | |
year_of_publication | |
FROM books | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE 1994 <= year_of_publication | |
AND year_of_publication <= 2004 | |
GROUP BY year_of_publication | |
) o1 | |
JOIN | |
( | |
SELECT year_of_publication, title, avg, cnt, stddev | |
FROM | |
( | |
SELECT isbn, AVG(book_rating) AS avg, COUNT(book_rating) AS cnt, STDDEV_SAMP(book_rating) AS stddev | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn | |
HAVING 50 <= COUNT(book_rating) | |
) t1 | |
JOIN | |
( | |
SELECT isbn, | |
year_of_publication, | |
book_title AS title | |
FROM books | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE 1994 <= year_of_publication | |
AND year_of_publication <= 2004 | |
) o2 | |
ON | |
(o1.max_this_year=o2.avg AND o1.year_of_publication=o2.year_of_publication) | |
ORDER BY year DESC | |
" | |
-- 5.6.2 年代別:平均が高かったブックTOP1の分布 (50<=cnt) -- | |
td query -w -d book_crossing_dataset -f csv -o book_dist_of_top1_review_avg_by_year.csv " | |
SELECT o1.year_of_publication AS year, title, ROUND(max_this_year*100)/100 AS avg, booK_rating, rating_cnt, cnt, ROUND(rating_cnt/cnt*1000)/10 AS rate | |
FROM | |
( | |
SELECT year_of_publication, MAX(avg) as max_this_year | |
FROM | |
( | |
SELECT isbn, AVG(book_rating) AS avg, COUNT(book_rating) AS cnt | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn | |
HAVING 50 <= COUNT(book_rating) | |
) t1 | |
JOIN | |
( | |
SELECT isbn, | |
year_of_publication | |
FROM books | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE 1994 <= year_of_publication | |
AND year_of_publication <= 2004 | |
GROUP BY year_of_publication | |
) o1 | |
JOIN | |
( | |
SELECT year_of_publication, t1.isbn AS isbn, title, avg, cnt, stddev | |
FROM | |
( | |
SELECT isbn, AVG(book_rating) AS avg, COUNT(book_rating) AS cnt, STDDEV_SAMP(book_rating) AS stddev | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn | |
HAVING 50 <= COUNT(book_rating) | |
) t1 | |
JOIN | |
( | |
SELECT isbn, | |
year_of_publication, | |
v['book_title'] AS title | |
FROM books | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE 1994 <= year_of_publication | |
AND year_of_publication <= 2004 | |
) o2 | |
ON | |
(o1.max_this_year=o2.avg AND o1.year_of_publication=o2.year_of_publication) | |
JOIN | |
( | |
SELECT isbn, book_rating, COUNT(*) AS rating_cnt | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY isbn, book_rating | |
) o3 | |
ON | |
(o2.isbn=o3.isbn) | |
ORDER BY year DESC, book_rating | |
" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment