Condla/00_Pig_Examples.md

## 00_Pig_Examples.md

      
    Raw
  

              00_Pig_Examples.md
            
          
    Pig Examples

You can run the pig examples below with the following commands.
Note: You need to have Pig, Tez, HDFS, YARN setup, HBase and Hive tables must exist with the name used in the scripts.
hive_to_hbase.pig

Run:
pig -Dtez.queue.name=myQueue -x tez -useHCatalog -param "my_datetime=2018-03-30_13:05:21" -f hive_to_hbase.pig 

hbase_to_csv.pig

pig -x tez -f hbase_to_csv.pig


## hbase_to_csv.pig
test = LOAD 'hbase://condla:test'
       USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('f1:col1, f1:col2, f1:col3', '-loadKey true')
       AS ( id:bytearray, col1, col2, col3);

sorted_test = ORDER test BY col2;

STORE sorted_test INTO '/user/condla/test' USING PigStorage('\t');

## hive_to_hbase.pig
test = LOAD 'condla.test' USING org.apache.hive.hcatalog.pig.HCatLoader();

hbase_dump = FOREACH test
        GENERATE
        CONCAT($4, '_', (chararray)ToUnixTime($0), '_',(chararray)ToUnixTime($1), '_', $5) as row_id,
        *;

hbase_dump_filtered = FILTER hbase_dump BY call_datetime >= ToDate('$my_datetime', 'yyyy-MM-dd_HH:mm:ss');

copy = STORE hbase_dump_filtered INTO 'hbase://condla:test'
        USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('cf1:col1 cf1:col2 cf2:col1 cf3:col1 cf3:col2');
	test = LOAD 'hbase://condla:test'
	USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('f1:col1, f1:col2, f1:col3', '-loadKey true')
	AS ( id:bytearray, col1, col2, col3);

	sorted_test = ORDER test BY col2;

	STORE sorted_test INTO '/user/condla/test' USING PigStorage('\t');
	test = LOAD 'condla.test' USING org.apache.hive.hcatalog.pig.HCatLoader();

	hbase_dump = FOREACH test
	GENERATE
	CONCAT($4, '_', (chararray)ToUnixTime($0), '_',(chararray)ToUnixTime($1), '_', $5) as row_id,
	*;

	hbase_dump_filtered = FILTER hbase_dump BY call_datetime >= ToDate('$my_datetime', 'yyyy-MM-dd_HH:mm:ss');

	copy = STORE hbase_dump_filtered INTO 'hbase://condla:test'
	USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('cf1:col1 cf1:col2 cf2:col1 cf3:col1 cf3:col2');