Last active
December 22, 2015 05:09
-
-
Save tzolov/6422047 to your computer and use it in GitHub Desktop.
Testing the multi-VMs Pivotal HD cluster
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Within your vagrant project folder: | |
git clone https://github.com/gopivotal/pivotal-samples.git | |
mv pivotal-samples/sample-data ./retail_demo | |
rm -Rf ./pivotal-samples | |
# login to the PHD master (and client) node. The ssh password: gpadmin | |
# phd1 = 10.211.55.101 | |
ssh gpadmin@10.211.55.101 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First download the demo data into your vagrant start directory under folder sample-data | |
hadoop fs -rm -r /retail_demo | |
hadoop fs -mkdir /retail_demo | |
hadoop fs -mkdir /retail_demo/categories_dim | |
hadoop fs -put /vagrant/sample-data/categories_dim.tsv.gz /retail_demo/categories_dim/ | |
hadoop fs -mkdir /retail_demo/customer_addresses_dim | |
hadoop fs -put /vagrant/sample-data/customer_addresses_dim.tsv.gz /retail_demo/customer_addresses_dim/ | |
hadoop fs -mkdir /retail_demo/customers_dim | |
hadoop fs -put /vagrant/sample-data/customers_dim.tsv.gz /retail_demo/customers_dim/ | |
hadoop fs -mkdir /retail_demo/email_addresses_dim | |
hadoop fs -put /vagrant/sample-data/email_addresses_dim.tsv.gz /retail_demo/email_addresses_dim/ | |
hadoop fs -mkdir /retail_demo/order_lineitems | |
hadoop fs -put /vagrant/sample-data/order_lineitems.tsv.gz /retail_demo/order_lineitems/ | |
hadoop fs -mkdir /retail_demo/orders | |
hadoop fs -put /vagrant/sample-data/orders.tsv.gz /retail_demo/orders/ | |
hadoop fs -mkdir /retail_demo/products_dim | |
hadoop fs -put /vagrant/sample-data/products_dim.tsv.gz /retail_demo/products_dim/ | |
hadoop fs -mkdir /retail_demo/date_dim/ | |
hadoop fs -put /vagrant/sample-data/date_dim.tsv.gz /retail_demo/date_dim/ | |
hadoop fs -mkdir /retail_demo/payment_methods/ | |
hadoop fs -put /vagrant/sample-data/payment_methods.tsv.gz /retail_demo/payment_methods/ | |
hdfs fsck /retail_demo -files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# psql -d postgres | |
DROP EXTERNAL TABLE retail_demo.categories_dim_pxf; | |
DROP EXTERNAL TABLE retail_demo.customers_dim_pxf; | |
DROP EXTERNAL TABLE retail_demo.order_lineitems_pxf; | |
DROP EXTERNAL TABLE retail_demo.orders_pxf; | |
DROP EXTERNAL TABLE retail_demo.customer_addresses_dim_pxf; | |
DROP EXTERNAL TABLE retail_demo.date_dim_pxf; | |
DROP EXTERNAL TABLE retail_demo.email_addresses_dim_pxf; | |
DROP EXTERNAL TABLE retail_demo.payment_methods_pxf; | |
DROP EXTERNAL TABLE retail_demo.products_dim_pxf; | |
CREATE EXTERNAL TABLE retail_demo.categories_dim_pxf | |
( | |
category_id integer, | |
category_name character varying(400) | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/categories_dim/categories_dim.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
CREATE EXTERNAL TABLE retail_demo.customers_dim_pxf | |
( | |
customer_id TEXT, | |
first_name TEXT, | |
last_name TEXT, | |
gender TEXT | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/customers_dim/customers_dim.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
CREATE EXTERNAL TABLE retail_demo.order_lineitems_pxf | |
( | |
order_id TEXT, | |
order_item_id TEXT, | |
product_id TEXT, | |
product_name TEXT, | |
customer_id TEXT, | |
store_id TEXT, | |
item_shipment_status_code TEXT, | |
order_datetime TEXT, | |
ship_datetime TEXT, | |
item_return_datetime TEXT, | |
item_refund_datetime TEXT, | |
product_category_id TEXT, | |
product_category_name TEXT, | |
payment_method_code TEXT, | |
tax_amount TEXT, | |
item_quantity TEXT, | |
item_price TEXT, | |
discount_amount TEXT, | |
coupon_code TEXT, | |
coupon_amount TEXT, | |
ship_address_line1 TEXT, | |
ship_address_line2 TEXT, | |
ship_address_line3 TEXT, | |
ship_address_city TEXT, | |
ship_address_state TEXT, | |
ship_address_postal_code TEXT, | |
ship_address_country TEXT, | |
ship_phone_number TEXT, | |
ship_customer_name TEXT, | |
ship_customer_email_address TEXT, | |
ordering_session_id TEXT, | |
website_url TEXT | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/order_lineitems/order_lineitems.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
CREATE EXTERNAL TABLE retail_demo.orders_pxf | |
( | |
order_id TEXT, | |
customer_id TEXT, | |
store_id TEXT, | |
order_datetime TEXT, | |
ship_completion_datetime TEXT, | |
return_datetime TEXT, | |
refund_datetime TEXT, | |
payment_method_code TEXT, | |
total_tax_amount TEXT, | |
total_paid_amount TEXT, | |
total_item_quantity TEXT, | |
total_discount_amount TEXT, | |
coupon_code TEXT, | |
coupon_amount TEXT, | |
order_canceled_flag TEXT, | |
has_returned_items_flag TEXT, | |
has_refunded_items_flag TEXT, | |
fraud_code TEXT, | |
fraud_resolution_code TEXT, | |
billing_address_line1 TEXT, | |
billing_address_line2 TEXT, | |
billing_address_line3 TEXT, | |
billing_address_city TEXT, | |
billing_address_state TEXT, | |
billing_address_postal_code TEXT, | |
billing_address_country TEXT, | |
billing_phone_number TEXT, | |
customer_name TEXT, | |
customer_email_address TEXT, | |
ordering_session_id TEXT, | |
website_url TEXT | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/orders/orders.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
CREATE EXTERNAL TABLE retail_demo.customer_addresses_dim_pxf | |
( | |
customer_address_id TEXT, | |
customer_id TEXT, | |
valid_from_timestamp TEXT, | |
valid_to_timestamp TEXT, | |
house_number TEXT, | |
street_name TEXT, | |
appt_suite_no TEXT, | |
city TEXT, | |
state_code TEXT, | |
zip_code TEXT, | |
zip_plus_four TEXT, | |
country TEXT, | |
phone_number TEXT | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/customer_addresses_dim/customer_addresses_dim.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
CREATE EXTERNAL TABLE retail_demo.date_dim_pxf | |
( | |
calendar_day date, | |
reporting_year smallint, | |
reporting_quarter smallint, | |
reporting_month smallint, | |
reporting_week smallint, | |
reporting_dow smallint | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/date_dim/date_dim.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
CREATE EXTERNAL TABLE retail_demo.email_addresses_dim_pxf | |
( | |
customer_id TEXT, | |
email_address TEXT | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/email_addresses_dim/email_addresses_dim.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
CREATE EXTERNAL TABLE retail_demo.payment_methods_pxf | |
( | |
payment_method_id smallint, | |
payment_method_code character varying(20) | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/payment_methods/payment_methods.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
CREATE EXTERNAL TABLE retail_demo.products_dim_pxf | |
( | |
product_id TEXT, | |
category_id TEXT, | |
price TEXT, | |
product_name TEXT | |
) | |
LOCATION ('pxf://phd1:50070/retail_demo/products_dim/products_dim.tsv.gz?Fragmenter=HdfsDataFragmenter&Accessor=TextFileAccessor&Resolver=TextResolver') | |
FORMAT 'TEXT' (DELIMITER = E'\t'); | |
select billing_address_postal_code, sum(total_paid_amount::float8) as total, sum(total_tax_amount::float8) as tax | |
from retail_demo.orders_pxf | |
group by billing_address_postal_code | |
order by total desc limit 10; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
orders = LOAD '/retail_demo/orders/orders.tsv.gz' | |
USING PigStorage('\t') AS ( | |
order_id : long, | |
customer_id : int, | |
store_id : int, | |
order_datetime : chararray, | |
ship_completion_datetime : chararray, | |
return_datetime : chararray, | |
refund_datetime : chararray, | |
payment_method_code : chararray, | |
total_tax_amount :double, | |
total_paid_amount : double, | |
total_item_quantity : int, | |
total_discount_amount : int, | |
coupon_code : chararray, | |
coupon_amount : int, | |
order_canceled_flag : chararray, | |
has_returned_items_flag : chararray, | |
has_refunded_items_flag : chararray, | |
fraud_code : chararray, | |
fraud_resolution_code : chararray, | |
billing_address_line1 : chararray, | |
billing_address_line2 : chararray, | |
billing_address_line3 : chararray, | |
billing_address_city : chararray, | |
billing_address_state : chararray, | |
billing_address_postal_code : int, | |
billing_address_country : chararray, | |
billing_phone_number : chararray, | |
customer_name : chararray, | |
customer_email_address :chararray, | |
ordering_session_id : int, | |
website_url : chararray | |
); | |
pcode = GROUP orders BY billing_address_postal_code; | |
revenue_counts = FOREACH pcode GENERATE group as zip, SUM(orders.total_paid_amount) as total,SUM(orders.total_tax_amount); | |
order_revenue = order revenue_counts by total DESC; | |
firstten = limit order_revenue 10; | |
dump firstten; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you messup your external table HDFS path you will see something like:
ERROR: remote component error (0): There is no pxf servlet listening on the host and port specified in the external table url (libchurl.c:694)