Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save wyukawa/1086414 to your computer and use it in GitHub Desktop.
Save wyukawa/1086414 to your computer and use it in GitHub Desktop.
みてわかる クラウド マガジン Vol.3の特集4をHiveQLで一部書いてみた
select
*
from
(
select
dis.user as user,
dis.sim as sim
from
(
select
rec.user as user,
sqrt(sum(rec.diff*rec.diff)) as sim
from
(
select
o_table.user as user,
o_table.menu as menu,
if(a_table.menu is null, "NONE", o_table.ratio - a_table.ratio) as diff
from
(
select
*
from
(
select
food_cnt.user as user,
food_cnt.menu as menu,
food_cnt.cnt/user_total.total as ratio
from
(
select
user as user,
count(*) as total
from
cafeteriahistory201007
group by
user
)user_total
join
(
select
user as user,
menu as menu,
count(1) as cnt
from
cafeteriahistory201007
group by
user,
menu
)food_cnt
on
user_total.user=food_cnt.user
)tab1
where
tab1.user != 'A'
)o_table
left outer join
(
select
*
from
(
select
food_cnt.user as user,
food_cnt.menu as menu,
food_cnt.cnt/user_total.total as ratio
from
(
select
user as user,
count(*) as total
from
cafeteriahistory201007
group by
user
)user_total
join
(
select
user as user,
menu as menu,
count(1) as cnt
from
cafeteriahistory201007
group by
user,
menu
)food_cnt
on
user_total.user=food_cnt.user
)tab2
where
tab2.user == 'A'
)a_table
on
a_table.menu = o_table.menu
)rec
where
rec.diff != 'NONE'
group by
rec.user
)dis
where
dis.sim < 0.1
)sim
;
@wyukawa
Copy link
Author

wyukawa commented Jul 16, 2011

実行するとこんな感じ

C 0.045549532874995705
E 0.047382490568478285

@wyukawa
Copy link
Author

wyukawa commented Jul 16, 2011

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment