Skip to content

Instantly share code, notes, and snippets.

@Attsun1031
Attsun1031 / gist:f3ad87894dc76f89e168
Created February 15, 2015 14:53
spark-summit 2014 spark streaming hands on
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.streaming._
import org.apache.spark.streaming.twitter._
import org.apache.spark.streaming.StreamingContext._
import TutorialHelper._
object Tutorial {
def main(args: Array[String]) {
// Checkpoint directory
@Attsun1031
Attsun1031 / gist:0979e90a95372bc562c5
Last active August 29, 2015 14:05
creat hive table for ldgourmet
# ratings
create table ratings (id int, restaurant_id int, user_id string, total int, food int, service int, atmosphere int, cost_performance int, title string, body string, purpose int, created_on timestamp) row format delimited fields terminated by ',' lines terminated by '\n';
load data local inpath './ratings.csv' overwrite into table ratings;
# restaurants
create table restaurants (id int, name string, property string, alphabet string, name_kana string, pref_id int, area_id int, station_id1 int, station_time1 int, station_distance1 int, station_id2 int, station_time2 int, station_distance2 int, station_id3 int, station_time3 int, station_distance3 int, category_id1 int, category_id2 int, category_id3 int, category_id4 int, category_id5 int, zip string , address string , north_latitude float, east_longitude float, description string, purpose int, open_morning boolean, open_lunch boolean, open_late boolean, photo_count int, special_count int, menu_count int, fan_count int, access_count int, created_on tim
# java
sudo apt-get -y install python-software-properties
sudo add-apt-repository -y ppa:webupd8team/java
sudo apt-get update
sudo apt-get -y install oracle-java7-installer
echo "export JAVA_HOME=/usr/lib/jvm/java-7-oracle" >> ~/.bashrc
echo "export PATH=$PATH:$JAVA_HOME/bin" >> ~/.bashrc
# CDH4
curl -O http://archive.cloudera.com/cdh4/one-click-install/squeeze/amd64/cdh4-repository_1.0_all.deb