-
example data from http://neo4j.com/docs/milestone/import-tool-basic-example.html
-
create table
$ echo "create 'actor', 'a'" | hbase shell 15/07/21 13:29:36 INFO Configuration.deprecation: hadoop.native.lib is deprecated. Instead, use io.native.lib.available HBase Shell; enter 'help<RETURN>' for list of supported commands. Type "exit<RETURN>" to leave the HBase Shell Version 0.98.6-cdh5.3.0, rUnknown, Tue Dec 16 19:13:29 PST 2014 create 'actor', 'a' 0 row(s) in 2.1280 seconds Hbase::Table - actor $ echo "create 'movie', 'm'" | hbase shell ...
-
insert rows
$ echo "put 'actor', 'keanu', 'a:fullname', 'Keanu Reeves'" | hbase shell $ echo "put 'actor', 'laurence', 'a:fullname', 'Laurence Fishburne'" | hbase shell $ echo "put 'actor', 'carrieanne', 'a:fullname', 'Carrie-Anne Moss'" | hbase shell $ echo "put 'movie', 'tt0133093', 'm:title', 'The Matrix'" | hbase shell $ echo "put 'movie', 'tt0133093', 'm:year', 1999" | hbase shell $ echo "put 'movie', 'tt0234215', 'm:title', 'The Matrix Reloaded'" | hbase shell $ echo "put 'movie', 'tt0234215', 'm:year', 2003" | hbase shell $ echo "put 'movie', 'tt0242653', 'm:title', 'The Matrix Revolutions'" | hbase shell $ echo "put 'movie', 'tt0242653', 'm:year', 2003" | hbase shell
-
dump
actor = LOAD 'hbase://actor' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('a:*', '-loadKey true') AS (id:bytearray, am:map[bytearray]); movie = LOAD 'hbase://movie' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('m:*', '-loadKey true') AS (id:bytearray, mm:map[bytearray]); a_temp = FOREACH actor GENERATE id, CONCAT(CONCAT('"', am#'fullname'), '"'); m_temp = FOREACH movie GENERATE id, CONCAT(CONCAT('"', mm#'title'), '"'), mm#'year'; joined = CROSS a_temp, m_temp; j_temp = FOREACH joined GENERATE $0, $2; a_dumped = STORE a_temp INTO 'actor' USING PigStorage(','); m_dumped = STORE m_temp INTO 'movie' USING PigStorage(','); j_dumped = STORE j_temp INTO 'roles' USING PigStorage(',');
-
dump some columns
table_loaded = LOAD 'hbase://[table name]' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('[column family name]:*', '-loadKey true') AS (id:bytearray, m:map[bytearray]); A = FOREACH table_loaded GENERATE m#'[col n]', REPLACE(m#'[col m]', ',', ' '), '[some text if necessary]'; STORE A INTO '[hdfs directory A]' USING PigStorage('[delimiter]'); B = FOREACH ss_category GENERATE m#'[col x]', '["some text if necessary"]', m#'[col y]'; B_filtered = FILTER B BY 0 < SIZE($0) AND 0 < SIZE($1); B_distinct = DISTINCT B_filtered; STORE B_distinct INTO '[hdfs directory B]' USING PigStorage('[delimiter]');
-
troubleshooting
scalar has more than one row in the output
use relation operator ::
-
ref
-
-
Save hyunjun/55f83bfd91e2b1e24f46 to your computer and use it in GitHub Desktop.
from hbase to graph DB
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment