Skip to content

Instantly share code, notes, and snippets.

@afonsoaugusto
Last active November 7, 2018 19:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afonsoaugusto/2c9c945bc9026d3bf653efe9d7d7553c to your computer and use it in GitHub Desktop.
Save afonsoaugusto/2c9c945bc9026d3bf653efe9d7d7553c to your computer and use it in GitHub Desktop.
hadoop fs -ls
hadoop fs -rm -r myinput
hadoop fs -rm -r output_max_store
hadoop fs -put data/purchases.txt myinput
hs code/mapper.py code/reducer.py myinput output
hadoop fs -get output/part-00000 data/mylocalfile.txt
cat data/mylocalfile.txt
hadoop fs -rm -r forum_node
hadoop fs -rm -r forum_users
hadoop fs -put forum_node.tsv forum_node
hadoop fs -put forum_users.tsv forum_users
hive
create database forum;
use forum;
drop table forum_node;
CREATE EXTERNAL TABLE IF NOT EXISTS forum_node (
id STRING,
title STRING,
tagnames STRING,
author_id STRING,
body STRING,
node_type STRING,
parent_id STRING,
abs_parent_id STRING,
added_at STRING,
score STRING,
state_string STRING,
last_edited_id STRING,
last_activity_by_id STRING,
last_activity_at STRING,
active_revision_id STRING,
extra STRING,
extra_ref_id STRING,
extra_count STRING,
marked STRING )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE location '/user/training/forum_node_table'
tblproperties ("skip.header.line.count"="1");
LOAD DATA INPATH '/user/training/forum_node' OVERWRITE INTO TABLE forum_node;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment