Skip to content

Instantly share code, notes, and snippets.

@Attsun1031
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Attsun1031/0979e90a95372bc562c5 to your computer and use it in GitHub Desktop.
Save Attsun1031/0979e90a95372bc562c5 to your computer and use it in GitHub Desktop.
creat hive table for ldgourmet
# ratings
create table ratings (id int, restaurant_id int, user_id string, total int, food int, service int, atmosphere int, cost_performance int, title string, body string, purpose int, created_on timestamp) row format delimited fields terminated by ',' lines terminated by '\n';
load data local inpath './ratings.csv' overwrite into table ratings;
# restaurants
create table restaurants (id int, name string, property string, alphabet string, name_kana string, pref_id int, area_id int, station_id1 int, station_time1 int, station_distance1 int, station_id2 int, station_time2 int, station_distance2 int, station_id3 int, station_time3 int, station_distance3 int, category_id1 int, category_id2 int, category_id3 int, category_id4 int, category_id5 int, zip string , address string , north_latitude float, east_longitude float, description string, purpose int, open_morning boolean, open_lunch boolean, open_late boolean, photo_count int, special_count int, menu_count int, fan_count int, access_count int, created_on timestamp, modified_on timestamp, closed boolean) row format delimited fields terminated by ',' lines terminated by '\n';
load data local inpath './restaurants.csv' overwrite into table restaurants;
# area
create table areas (id int, pref_id int, name string) row format delimited fields terminated by ',' lines terminated by '\n';
load data local inpath './area.csv' overwrite into table areas;
# 銀座・日本橋・新橋地域(aera.id = 1)にある各レストランの平均レート
select avg(ratings.total), restaurants.id, max(restaurants.name) from ratings join restaurants on (ratings.restaurant_id = restaurants.id) where restaurants.area_id = 1 group by restaurants.id
...
Time taken: 57.157 seconds
# dynamic partitionを使って既存のテーブル(area)からpartitionを持つテーブル(p_areas)を作る。
create table p_areas (id int, name string) partitioned by (pref_id int) row format delimited fields terminated by ',' lines terminated by '\n';
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
from areas insert overwrite table p_areas partition(pref_id) select id, name, pref_id;
## partition数が大きくなる場合は以下のパラメータを検討する。
## hive.exec.max.dynamic.partitions.pernode
## hive.exec.max.dynamic.partitions
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment