NitinKumar94/hive_query_plan

## hive_query_plan
hive> explain
select
b.subscriber_id,
b.handset,
b.connection_type,
b.agent_code,
b.gender,
b.age_in_network,
b.rate_plan,
b.caller_tune,
b.customer_type,
b.region,
b.network,
a.upload_data_volume,
a.download_data_volume,
a.value,a.time_stamp,
a.cell_site,a.latitude,
a.longitude,a.circle,
a.rate_group,
a.disconnect_reason
from
lte_data_tenmillion a
join subscriber_data b on
a.subscriber_id = b.subscriber_id ;

OK
Plan not optimized by CBO.

Vertex dependency in root stage
Map 1 <- Map 2 (BROADCAST_EDGE)

Stage-0
   Fetch Operator
      limit:-1
      Stage-1
         Map 1 vectorized
         File Output Operator [FS_20]
            compressed:false
            Statistics:Num rows: 5500000 Data size: 3374484969 Basic stats: COMPLETE Column stats: NONE
            table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
            Select Operator [OP_19]
               outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"]
               Statistics:Num rows: 5500000 Data size: 3374484969 Basic stats: COMPLETE Column stats: NONE
               Map Join Operator [MAPJOIN_18]
               |  condition map:[{"":"Inner Join 0 to 1"}]
               |  HybridGraceHashJoin:true
               |  keys:{"Map 1":"subscriber_id (type: string)","Map 2":"subscriber_id (type: string)"}
               |  outputColumnNames:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col15","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26"]
               |  Statistics:Num rows: 5500000 Data size: 3374484969 Basic stats: COMPLETE Column stats: NONE
               |<-Map 2 [BROADCAST_EDGE] vectorized
               |  Reduce Output Operator [RS_16]
               |     key expressions:subscriber_id (type: string)
               |     Map-reduce partition columns:subscriber_id (type: string)
               |     sort order:+
               |     Statistics:Num rows: 50000 Data size: 47350000 Basic stats: COMPLETE Column stats: NONE
               |     value expressions:handset (type: string), connection_type (type: string), agent_code (type: bigint), gender (type: string), age_in_network (type: string), rate_plan (type: string), caller_tune (type: string), customer_type (type: string), region (type: string), network (type: string)
               |     Filter Operator [FIL_15]
               |        predicate:subscriber_id is not null (type: boolean)
               |        Statistics:Num rows: 50000 Data size: 47350000 Basic stats: COMPLETE Column stats: NONE
               |        TableScan [TS_1]
               |           alias:b
               |           Statistics:Num rows: 100000 Data size: 94700000 Basic stats: COMPLETE Column stats: NONE
               |<-Filter Operator [FIL_17]
                     predicate:subscriber_id is not null (type: boolean)
                     Statistics:Num rows: 5000000 Data size: 3067713542 Basic stats: COMPLETE Column stats: NONE
                     TableScan [TS_0]
                        alias:a
                        Statistics:Num rows: 10000000 Data size: 6135427084 Basic stats: COMPLETE Column stats: NONE

Time taken: 0.615 seconds, Fetched: 43 row(s)
	hive> explain
	select
	b.subscriber_id,
	b.handset,
	b.connection_type,
	b.agent_code,
	b.gender,
	b.age_in_network,
	b.rate_plan,
	b.caller_tune,
	b.customer_type,
	b.region,
	b.network,
	a.upload_data_volume,
	a.download_data_volume,
	a.value,a.time_stamp,
	a.cell_site,a.latitude,
	a.longitude,a.circle,
	a.rate_group,
	a.disconnect_reason
	from
	lte_data_tenmillion a
	join subscriber_data b on
	a.subscriber_id = b.subscriber_id ;

	OK
	Plan not optimized by CBO.

	Vertex dependency in root stage
	Map 1 <- Map 2 (BROADCAST_EDGE)

	Stage-0
	Fetch Operator
	limit:-1
	Stage-1
	Map 1 vectorized
	File Output Operator [FS_20]
	compressed:false
	Statistics:Num rows: 5500000 Data size: 3374484969 Basic stats: COMPLETE Column stats: NONE
	table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
	Select Operator [OP_19]
	outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"]
	Statistics:Num rows: 5500000 Data size: 3374484969 Basic stats: COMPLETE Column stats: NONE
	Map Join Operator [MAPJOIN_18]
	\| condition map:[{"":"Inner Join 0 to 1"}]
	\| HybridGraceHashJoin:true
	\| keys:{"Map 1":"subscriber_id (type: string)","Map 2":"subscriber_id (type: string)"}
	\| outputColumnNames:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col15","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26"]
	\| Statistics:Num rows: 5500000 Data size: 3374484969 Basic stats: COMPLETE Column stats: NONE
	\|<-Map 2 [BROADCAST_EDGE] vectorized
	\| Reduce Output Operator [RS_16]
	\| key expressions:subscriber_id (type: string)
	\| Map-reduce partition columns:subscriber_id (type: string)
	\| sort order:+
	\| Statistics:Num rows: 50000 Data size: 47350000 Basic stats: COMPLETE Column stats: NONE
	\| value expressions:handset (type: string), connection_type (type: string), agent_code (type: bigint), gender (type: string), age_in_network (type: string), rate_plan (type: string), caller_tune (type: string), customer_type (type: string), region (type: string), network (type: string)
	\| Filter Operator [FIL_15]
	\| predicate:subscriber_id is not null (type: boolean)
	\| Statistics:Num rows: 50000 Data size: 47350000 Basic stats: COMPLETE Column stats: NONE
	\| TableScan [TS_1]
	\| alias:b
	\| Statistics:Num rows: 100000 Data size: 94700000 Basic stats: COMPLETE Column stats: NONE
	\|<-Filter Operator [FIL_17]
	predicate:subscriber_id is not null (type: boolean)
	Statistics:Num rows: 5000000 Data size: 3067713542 Basic stats: COMPLETE Column stats: NONE
	TableScan [TS_0]
	alias:a
	Statistics:Num rows: 10000000 Data size: 6135427084 Basic stats: COMPLETE Column stats: NONE

	Time taken: 0.615 seconds, Fetched: 43 row(s)