thanoojgithub/Hive_WC.hql

## Hive_WC.hql
hive> CREATE TABLE thanooj.docs (line STRING);
OK
Time taken: 0.06 seconds
hive> LOAD DATA LOCAL INPATH '/home/ubuntu/input/abc.txt' OVERWRITE INTO TABLE THANOOJ.docs;
Loading data to table thanooj.docs
Table thanooj.docs stats: [numFiles=1, numRows=0, totalSize=57, rawDataSize=0]
OK
Time taken: 0.161 seconds

hive> select * from thanooj.docs;
OK
to be or not to be
to be or not to be
to be or not to be
Time taken: 0.057 seconds, Fetched: 3 row(s)

hive> SELECT split(line, ' ') AS word FROM docs;
OK
["to","be","or","not","to","be"]
["to","be","or","not","to","be"]
["to","be","or","not","to","be"]
Time taken: 0.069 seconds, Fetched: 3 row(s)

hive> SELECT explode(split(line, ' ')) AS word FROM docs;
OK
to
be
or
not
to
be
to
be
or
not
to
be
to
be
or
not
to
be
Time taken: 0.079 seconds, Fetched: 18 row(s)

hive> CREATE TABLE thanooj.word_counts AS SELECT word, count(word) AS count FROM (SELECT explode(split(line, ' ')) AS word FROM thanooj.docs) w GROUP BY word ORDER BY word;
Query ID = ubuntu_20160111174054_e5685e76-c6f3-4bcf-93f0-5f566e473654
Total jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1452510713608_0011, Tracking URL = http://ubuntu:8088/proxy/application_1452510713608_0011/
Kill Command = /usr/local/hadoop2/bin/hadoop job  -kill job_1452510713608_0011
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2016-01-11 17:41:00,586 Stage-1 map = 0%,  reduce = 0%
2016-01-11 17:41:07,125 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.12 sec
2016-01-11 17:41:14,613 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 1.93 sec
MapReduce Total cumulative CPU time: 1 seconds 930 msec
Ended Job = job_1452510713608_0011
Launching Job 2 out of 2
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1452510713608_0012, Tracking URL = http://ubuntu:8088/proxy/application_1452510713608_0012/
Kill Command = /usr/local/hadoop2/bin/hadoop job  -kill job_1452510713608_0012
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
2016-01-11 17:41:25,553 Stage-2 map = 0%,  reduce = 0%
2016-01-11 17:41:31,045 Stage-2 map = 100%,  reduce = 0%, Cumulative CPU 0.67 sec
2016-01-11 17:41:37,431 Stage-2 map = 100%,  reduce = 100%, Cumulative CPU 1.52 sec
MapReduce Total cumulative CPU time: 1 seconds 520 msec
Ended Job = job_1452510713608_0012
Moving data to: hdfs://localhost:54310/user/hive/warehouse/thanooj.db/word_counts
Table thanooj.word_counts stats: [numFiles=1, numRows=4, totalSize=21, rawDataSize=17]
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1  Reduce: 1   Cumulative CPU: 1.93 sec   HDFS Read: 7200 HDFS Write: 181 SUCCESS
Stage-Stage-2: Map: 1  Reduce: 1   Cumulative CPU: 1.52 sec   HDFS Read: 4516 HDFS Write: 96 SUCCESS
Total MapReduce CPU Time Spent: 3 seconds 450 msec
OK
Time taken: 44.291 seconds
hive> select * from thanooj.word_counts;
OK
be	6
not	3
or	3
to	6
Time taken: 0.068 seconds, Fetched: 4 row(s)
hive>


ubuntu@ubuntu:~/input$ hadoop fs -ls

ubuntu@ubuntu:~/input$ hadoop fs -ls /
Found 2 items
drwx-wx-wx   - ubuntu supergroup          0 2016-01-11 17:22 /tmp
drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:13 /user
ubuntu@ubuntu:~/input$ hadoop fs -ls /user

Found 1 items
drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:13 /user/hive


ubuntu@ubuntu:~/input$ hadoop fs -mkdir /user/ubuntu
ubuntu@ubuntu:~/input$ hadoop fs -ls
ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive
Found 1 items
drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:14 /user/hive/warehouse
ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse
Found 2 items
drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:13 /user/hive/warehouse/docs
drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:41 /user/hive/warehouse/thanooj.db
ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db
Found 2 items
drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:28 /user/hive/warehouse/thanooj.db/docs
drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:41 /user/hive/warehouse/thanooj.db/word_counts
ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db/docs
Found 1 items
-rwxr-xr-x   1 ubuntu supergroup         57 2016-01-11 17:28 /user/hive/warehouse/thanooj.db/docs/abc.txt


ubuntu@ubuntu:~/input$ hadoop fs -cat /user/hive/warehouse/thanooj.db/docs/abc.txt
to be or not to be
to be or not to be
to be or not to be

ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db/word_counts
Found 1 items
-rwxr-xr-x   1 ubuntu supergroup         21 2016-01-11 17:41 /user/hive/warehouse/thanooj.db/word_counts/000000_0

ubuntu@ubuntu:~/input$ hadoop fs -cat /user/hive/warehouse/thanooj.db/word_counts/000000_0
16/01/11 17:57:59 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
be6
not3
or3
to6
ubuntu@ubuntu:~/input$
	hive> CREATE TABLE thanooj.docs (line STRING);
	OK
	Time taken: 0.06 seconds
	hive> LOAD DATA LOCAL INPATH '/home/ubuntu/input/abc.txt' OVERWRITE INTO TABLE THANOOJ.docs;
	Loading data to table thanooj.docs
	Table thanooj.docs stats: [numFiles=1, numRows=0, totalSize=57, rawDataSize=0]
	OK
	Time taken: 0.161 seconds

	hive> select * from thanooj.docs;
	OK
	to be or not to be
	to be or not to be
	to be or not to be
	Time taken: 0.057 seconds, Fetched: 3 row(s)

	hive> SELECT split(line, ' ') AS word FROM docs;
	OK
	["to","be","or","not","to","be"]
	["to","be","or","not","to","be"]
	["to","be","or","not","to","be"]
	Time taken: 0.069 seconds, Fetched: 3 row(s)

	hive> SELECT explode(split(line, ' ')) AS word FROM docs;
	OK
	to
	be
	or
	not
	to
	be
	to
	be
	or
	not
	to
	be
	to
	be
	or
	not
	to
	be
	Time taken: 0.079 seconds, Fetched: 18 row(s)

	hive> CREATE TABLE thanooj.word_counts AS SELECT word, count(word) AS count FROM (SELECT explode(split(line, ' ')) AS word FROM thanooj.docs) w GROUP BY word ORDER BY word;
	Query ID = ubuntu_20160111174054_e5685e76-c6f3-4bcf-93f0-5f566e473654
	Total jobs = 2
	Launching Job 1 out of 2
	Number of reduce tasks not specified. Estimated from input data size: 1
	In order to change the average load for a reducer (in bytes):
	set hive.exec.reducers.bytes.per.reducer=<number>
	In order to limit the maximum number of reducers:
	set hive.exec.reducers.max=<number>
	In order to set a constant number of reducers:
	set mapreduce.job.reduces=<number>
	Starting Job = job_1452510713608_0011, Tracking URL = http://ubuntu:8088/proxy/application_1452510713608_0011/
	Kill Command = /usr/local/hadoop2/bin/hadoop job -kill job_1452510713608_0011
	Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
	2016-01-11 17:41:00,586 Stage-1 map = 0%, reduce = 0%
	2016-01-11 17:41:07,125 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.12 sec
	2016-01-11 17:41:14,613 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 1.93 sec
	MapReduce Total cumulative CPU time: 1 seconds 930 msec
	Ended Job = job_1452510713608_0011
	Launching Job 2 out of 2
	Number of reduce tasks determined at compile time: 1
	In order to change the average load for a reducer (in bytes):
	set hive.exec.reducers.bytes.per.reducer=<number>
	In order to limit the maximum number of reducers:
	set hive.exec.reducers.max=<number>
	In order to set a constant number of reducers:
	set mapreduce.job.reduces=<number>
	Starting Job = job_1452510713608_0012, Tracking URL = http://ubuntu:8088/proxy/application_1452510713608_0012/
	Kill Command = /usr/local/hadoop2/bin/hadoop job -kill job_1452510713608_0012
	Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
	2016-01-11 17:41:25,553 Stage-2 map = 0%, reduce = 0%
	2016-01-11 17:41:31,045 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.67 sec
	2016-01-11 17:41:37,431 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 1.52 sec
	MapReduce Total cumulative CPU time: 1 seconds 520 msec
	Ended Job = job_1452510713608_0012
	Moving data to: hdfs://localhost:54310/user/hive/warehouse/thanooj.db/word_counts
	Table thanooj.word_counts stats: [numFiles=1, numRows=4, totalSize=21, rawDataSize=17]
	MapReduce Jobs Launched:
	Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 1.93 sec HDFS Read: 7200 HDFS Write: 181 SUCCESS
	Stage-Stage-2: Map: 1 Reduce: 1 Cumulative CPU: 1.52 sec HDFS Read: 4516 HDFS Write: 96 SUCCESS
	Total MapReduce CPU Time Spent: 3 seconds 450 msec
	OK
	Time taken: 44.291 seconds
	hive> select * from thanooj.word_counts;
	OK
	be 6
	not 3
	or 3
	to 6
	Time taken: 0.068 seconds, Fetched: 4 row(s)
	hive>






	ubuntu@ubuntu:~/input$ hadoop fs -ls

	ubuntu@ubuntu:~/input$ hadoop fs -ls /
	Found 2 items
	drwx-wx-wx - ubuntu supergroup 0 2016-01-11 17:22 /tmp
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:13 /user
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user

	Found 1 items
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:13 /user/hive


	ubuntu@ubuntu:~/input$ hadoop fs -mkdir /user/ubuntu
	ubuntu@ubuntu:~/input$ hadoop fs -ls
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive
	Found 1 items
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:14 /user/hive/warehouse
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse
	Found 2 items
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:13 /user/hive/warehouse/docs
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:41 /user/hive/warehouse/thanooj.db
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db
	Found 2 items
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:28 /user/hive/warehouse/thanooj.db/docs
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:41 /user/hive/warehouse/thanooj.db/word_counts
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db/docs
	Found 1 items
	-rwxr-xr-x 1 ubuntu supergroup 57 2016-01-11 17:28 /user/hive/warehouse/thanooj.db/docs/abc.txt


	ubuntu@ubuntu:~/input$ hadoop fs -cat /user/hive/warehouse/thanooj.db/docs/abc.txt
	to be or not to be
	to be or not to be
	to be or not to be

	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db/word_counts
	Found 1 items
	-rwxr-xr-x 1 ubuntu supergroup 21 2016-01-11 17:41 /user/hive/warehouse/thanooj.db/word_counts/000000_0

	ubuntu@ubuntu:~/input$ hadoop fs -cat /user/hive/warehouse/thanooj.db/word_counts/000000_0
	16/01/11 17:57:59 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
	be6
	not3
	or3
	to6
	ubuntu@ubuntu:~/input$