Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Script to manually install example dataset for Cloudera Live + Tableau
# MIRROR_IP is valid for Cloudera Live 1.2.0 (aka "v3")
# DATANODE_IP must point to a DataNode
MIRROR_IP=208.113.126.57
DATANODE_IP=127.0.0.1
curl http://${MIRROR_IP}/files/samples/pos/Dates.csv > /tmp/Dates.csv
curl http://${MIRROR_IP}/files/samples/pos/Stores.csv > /tmp/Stores.csv
curl http://${MIRROR_IP}/files/samples/pos/Items.csv > /tmp/Items.csv
curl http://${MIRROR_IP}/files/samples/pos/POS.csv > /tmp/POS.csv
cat > /tmp/load_pos.sql <<EOF
DROP TABLE IF EXISTS POS;
CREATE TABLE POS(
Date_Key int,
Item_Key int,
Store_Key int,
Units int,
Dollars float
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LOCATION '/user/cloudera/sample_data/pos';
DROP TABLE IF EXISTS DATES;
CREATE TABLE DATES(
Date_Key int,
Char_Date string,
Dow string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LOCATION '/user/cloudera/sample_data/dates';
DROP TABLE IF EXISTS STORES;
CREATE TABLE STORES(
Store_Key int,
Store_Number int,
Banner string,
State string,
Channel string,
Goal int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LOCATION '/user/cloudera/sample_data/stores';
DROP TABLE IF EXISTS ITEMS;
CREATE TABLE ITEMS(
Item_Key int,
Category string,
Segment string,
Brand string,
Flavor string,
Package_Size string,
Color string,
MSRP float,
Wholesale float,
Item_Description string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LOCATION '/user/cloudera/sample_data/items';
EOF
cat > /tmp/load_pos.sh <<EOF
hdfs dfs -mkdir -p /user/cloudera/sample_data/pos
hdfs dfs -mkdir -p /user/cloudera/sample_data/dates
hdfs dfs -mkdir -p /user/cloudera/sample_data/stores
hdfs dfs -mkdir -p /user/cloudera/sample_data/items
hdfs dfs -rm /user/cloudera/sample_data/pos/*
hdfs dfs -rm /user/cloudera/sample_data/dates/*
hdfs dfs -rm /user/cloudera/sample_data/stores/*
hdfs dfs -rm /user/cloudera/sample_data/items/*
hdfs dfs -put /tmp/Dates.csv /user/cloudera/sample_data/dates
hdfs dfs -put /tmp/Stores.csv /user/cloudera/sample_data/stores
hdfs dfs -put /tmp/Items.csv /user/cloudera/sample_data/items
hdfs dfs -put /tmp/POS.csv /user/cloudera/sample_data/pos
/usr/bin/impala-shell -i ${DATANODE_IP} -f /tmp/load_pos.sql > /tmp/pos_loader.log
EOF
chown hdfs:hdfs /tmp/load_pos.sql
chmod 755 /tmp/load_pos.sql
chown hdfs:hdfs /tmp/load_pos.sh
chmod 755 /tmp/load_pos.sh
sudo -u hdfs /tmp/load_pos.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.