Skip to content

Instantly share code, notes, and snippets.

@doryokujin
Created June 28, 2012 05:39
Show Gist options
  • Save doryokujin/3009346 to your computer and use it in GitHub Desktop.
Save doryokujin/3009346 to your computer and use it in GitHub Desktop.
Book-Crossing Dataset
-- 2.1.1 ユーザーの居住国分布 --
td query -w -d book_crossing_dataset -f csv -o user_dist_of_country.csv "
SELECT country, cnt, ROUND(cnt/total_cnt*10000)/100 AS rate
FROM
(
SELECT country, COUNT(*) as cnt, 1 AS one
FROM users
WHERE country != '' AND country != 'n/a'
GROUP BY country
ORDER BY cnt DESC
LIMIT 30
) t1
JOIN
(
SELECT COUNT(country) as total_cnt, 1 AS one
FROM users
WHERE country != '' AND country != 'n/a'
) t2
ON
(t1.one=t2.one)
"
-- 2.1.2 ユーザーの居住地域分布 --
td query -w -d book_crossing_dataset -f csv -o user_dist_of_location.csv "
SELECT location1 AS state, cnt, ROUND(cnt/total_cnt*10000)/100 AS rate
FROM
(
SELECT location1, COUNT(*) as cnt, 1 AS one
FROM users
WHERE country = 'usa'
GROUP BY location1
ORDER BY cnt DESC
LIMIT 30
) t1
JOIN
(
SELECT COUNT(*) as total_cnt, 1 AS one
FROM users
WHERE country = 'usa'
) t2
ON
(t1.one=t2.one)
"
-- 2.2.1 ユーザーの年代分布 --
td query -w -d book_crossing_dataset "
SELECT generation, cnt
FROM
(
SELECT CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation, COUNT(*) as cnt
FROM users
WHERE IF(age='NULL',0,CAST(age AS INT)) <= 65
GROUP BY CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5
UNION ALL
SELECT CAST(65 AS BIGINT) AS generation, COUNT(*) as cnt
FROM users
WHERE 65 < CAST(age AS INT)
) t1
ORDER BY generation
"
-- 2.2.2 ユーザーの年代分布 --
td query -w -d book_crossing_dataset -f csv -o user_dist_of_generation.csv "
SELECT generation, cnt, ROUND(cnt/total_cnt*1000)/10 AS rate
FROM
(
SELECT CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation, COUNT(*) as cnt, 1 AS one
FROM users
WHERE 10 < IF(age='NULL',0,CAST(age AS INT))
AND IF(age='NULL',0,CAST(age AS INT)) <= 65
GROUP BY CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5
) t1
JOIN
(
SELECT COUNT(*) as total_cnt, 1 AS one
FROM users
WHERE 10 < IF(age='NULL',0,CAST(age AS INT))
AND IF(age='NULL',0,CAST(age AS INT)) <= 65
) t2
ON
(t1.one=t2.one)
ORDER BY generation
"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment