Last active
August 29, 2015 14:05
-
-
Save Attsun1031/0979e90a95372bc562c5 to your computer and use it in GitHub Desktop.
creat hive table for ldgourmet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ratings | |
create table ratings (id int, restaurant_id int, user_id string, total int, food int, service int, atmosphere int, cost_performance int, title string, body string, purpose int, created_on timestamp) row format delimited fields terminated by ',' lines terminated by '\n'; | |
load data local inpath './ratings.csv' overwrite into table ratings; | |
# restaurants | |
create table restaurants (id int, name string, property string, alphabet string, name_kana string, pref_id int, area_id int, station_id1 int, station_time1 int, station_distance1 int, station_id2 int, station_time2 int, station_distance2 int, station_id3 int, station_time3 int, station_distance3 int, category_id1 int, category_id2 int, category_id3 int, category_id4 int, category_id5 int, zip string , address string , north_latitude float, east_longitude float, description string, purpose int, open_morning boolean, open_lunch boolean, open_late boolean, photo_count int, special_count int, menu_count int, fan_count int, access_count int, created_on timestamp, modified_on timestamp, closed boolean) row format delimited fields terminated by ',' lines terminated by '\n'; | |
load data local inpath './restaurants.csv' overwrite into table restaurants; | |
# area | |
create table areas (id int, pref_id int, name string) row format delimited fields terminated by ',' lines terminated by '\n'; | |
load data local inpath './area.csv' overwrite into table areas; | |
# 銀座・日本橋・新橋地域(aera.id = 1)にある各レストランの平均レート | |
select avg(ratings.total), restaurants.id, max(restaurants.name) from ratings join restaurants on (ratings.restaurant_id = restaurants.id) where restaurants.area_id = 1 group by restaurants.id | |
... | |
Time taken: 57.157 seconds | |
# dynamic partitionを使って既存のテーブル(area)からpartitionを持つテーブル(p_areas)を作る。 | |
create table p_areas (id int, name string) partitioned by (pref_id int) row format delimited fields terminated by ',' lines terminated by '\n'; | |
set hive.exec.dynamic.partition.mode=nonstrict; | |
set hive.exec.dynamic.partition=true; | |
from areas insert overwrite table p_areas partition(pref_id) select id, name, pref_id; | |
## partition数が大きくなる場合は以下のパラメータを検討する。 | |
## hive.exec.max.dynamic.partitions.pernode | |
## hive.exec.max.dynamic.partitions |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment