besquared/gist:48261

## gistfile1.txt
hive> DESCRIBE EXTENDED measurements;
OK
groups	string
value	double
measure	string
account	string
application	string
dataset	string
hour	int
span	int
options	string
Detailed Table Information:
Table(tableName:measurements,dbName:default,owner:Josh,createTime:1231957580,lastAccessTime:0,retention:0,sd:StorageDescriptor(cols:[FieldSchema(name:groups,type:string,comment:null), FieldSchema(name:value,type:double,comment:null)],location:hdfs://localhost:9000/user/hive/warehouse/measurements,inputFormat:org.apache.hadoop.mapred.TextInputFormat,outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat,compressed:false,numBuckets:-1,serdeInfo:SerDeInfo(name:null,serializationLib:org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe,parameters:{colelction.delim=44,mapkey.delim=58,serialization.format=org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol}),bucketCols:[],sortCols:[],parameters:{}),partitionKeys:[FieldSchema(name:measure,type:string,comment:null), FieldSchema(name:account,type:string,comment:null), FieldSchema(name:application,type:string,comment:null), FieldSchema(name:dataset,type:string,comment:null), FieldSchema(name:hour,type:int,comment:null), FieldSchema(name:span,type:int,comment:null), FieldSchema(name:options,type:string,comment:null)],parameters:{})

ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF activities) (TOK_TABREF percentiles) (= (TOK_COLREF activities actor_id) (TOK_COLREF percentiles actor_id)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB measurements (TOK_PARTSPEC (TOK_PARTVAL measure 'percentile_count') (TOK_PARTVAL account 'cUU5T7y6DmdzMJFcFt3JDe') (TOK_PARTVAL application 'test') (TOK_PARTVAL dataset 'purchases') (TOK_PARTVAL hour 341400) (TOK_PARTVAL span 1) (TOK_PARTVAL options '6b5bb627456b7aada19b014ca8c62a3158d91e8e')))) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF percentiles percentile)) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_COLREF activities actor_id)))) (TOK_WHERE (AND (AND (AND (AND (= (TOK_COLREF activities account) 'cUU5T7y6DmdzMJFcFt3JDe') (= (TOK_COLREF activities application) 'test')) (= (TOK_COLREF activities dataset) 'purchases')) (>= (TOK_COLREF activities hour) 341400)) (< (TOK_COLREF activities hour) 341401))) (TOK_GROUPBY (TOK_COLREF percentiles percentile))))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1
  Stage-3 depends on stages: Stage-2
  Stage-0 depends on stages: Stage-3, Stage-1
  Stage-0 depends on stages: Stage-3, Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        activities
            Select Operator
              expressions:
                    expr: actor_id
                    type: string
                    expr: account
                    type: string
                    expr: application
                    type: string
                    expr: dataset
                    type: string
                    expr: hour
                    type: string
              Reduce Output Operator
                key expressions:
                      expr: 0
                      type: string
                Map-reduce partition columns:
                      expr: 0
                      type: string
                tag: 0
                value expressions:
                      expr: 0
                      type: string
                      expr: 1
                      type: string
                      expr: 2
                      type: string
                      expr: 3
                      type: string
                      expr: 4
                      type: string
        percentiles
            Select Operator
              expressions:
                    expr: actor_id
                    type: string
                    expr: percentile
                    type: int
              Reduce Output Operator
                key expressions:
                      expr: 0
                      type: string
                Map-reduce partition columns:
                      expr: 0
                      type: string
                tag: 1
                value expressions:
                      expr: 0
                      type: string
                      expr: 1
                      type: int
      Reduce Operator Tree:
        Join Operator
          condition map:
               Left Outer Join0 to 1
          condition expressions:
            0 {VALUE.0} {VALUE.1} {VALUE.2} {VALUE.3} {VALUE.4}
            1 {VALUE.0} {VALUE.1}
          Filter Operator
            predicate:
                expr: (((((1 = 'cUU5T7y6DmdzMJFcFt3JDe') and (2 = 'test')) and (3 = 'purchases')) and (4 >= 341400)) and (4 < 341401))
                type: boolean
            File Output Operator
              table:
                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                  output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
                  name: binary_table

  Stage: Stage-2
    Map Reduce
      Alias -> Map Operator Tree:
        /tmp/hive-Josh/295739590/1568601541.10002
          Reduce Output Operator
            key expressions:
                  expr: 6
                  type: int
            Map-reduce partition columns:
                  expr: rand()
                  type: double
            tag: -1
            value expressions:
                  expr: 0
                  type: string
      Reduce Operator Tree:
        Group By Operator

              expr: count(VALUE.0)
          keys:
                expr: KEY.0
                type: int
          mode: partial1
          File Output Operator
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
                name: binary_table

  Stage: Stage-3
    Map Reduce
      Alias -> Map Operator Tree:
        /tmp/hive-Josh/295739590/1568601541.10001
          Reduce Output Operator
            key expressions:
                  expr: 0
                  type: int
            Map-reduce partition columns:
                  expr: 0
                  type: int
            tag: -1
            value expressions:
                  expr: 1
                  type: bigint
      Reduce Operator Tree:
        Group By Operator

              expr: count(VALUE.0)
          keys:
                expr: KEY.0
                type: int
          mode: unknown
          Select Operator
            expressions:
                  expr: 0
                  type: int
                  expr: 1
                  type: bigint
            Select Operator
              File Output Operator
                table:
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
                    serde: org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe
                    name: measurements

  Stage: Stage-0
    Move Operator
      tables:
            partition:
              measure percentile_count
              account cUU5T7y6DmdzMJFcFt3JDe
              application test
              dataset purchases
              hour 341400
              span 1
              options 6b5bb627456b7aada19b014ca8c62a3158d91e8e
            replace:
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
                serde: org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe
                name: measurements
	hive> DESCRIBE EXTENDED measurements;
	OK
	groups string
	value double
	measure string
	account string
	application string
	dataset string
	hour int
	span int
	options string
	Detailed Table Information:
	Table(tableName:measurements,dbName:default,owner:Josh,createTime:1231957580,lastAccessTime:0,retention:0,sd:StorageDescriptor(cols:[FieldSchema(name:groups,type:string,comment:null), FieldSchema(name:value,type:double,comment:null)],location:hdfs://localhost:9000/user/hive/warehouse/measurements,inputFormat:org.apache.hadoop.mapred.TextInputFormat,outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat,compressed:false,numBuckets:-1,serdeInfo:SerDeInfo(name:null,serializationLib:org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe,parameters:{colelction.delim=44,mapkey.delim=58,serialization.format=org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol}),bucketCols:[],sortCols:[],parameters:{}),partitionKeys:[FieldSchema(name:measure,type:string,comment:null), FieldSchema(name:account,type:string,comment:null), FieldSchema(name:application,type:string,comment:null), FieldSchema(name:dataset,type:string,comment:null), FieldSchema(name:hour,type:int,comment:null), FieldSchema(name:span,type:int,comment:null), FieldSchema(name:options,type:string,comment:null)],parameters:{})

	ABSTRACT SYNTAX TREE:
	(TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF activities) (TOK_TABREF percentiles) (= (TOK_COLREF activities actor_id) (TOK_COLREF percentiles actor_id)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB measurements (TOK_PARTSPEC (TOK_PARTVAL measure 'percentile_count') (TOK_PARTVAL account 'cUU5T7y6DmdzMJFcFt3JDe') (TOK_PARTVAL application 'test') (TOK_PARTVAL dataset 'purchases') (TOK_PARTVAL hour 341400) (TOK_PARTVAL span 1) (TOK_PARTVAL options '6b5bb627456b7aada19b014ca8c62a3158d91e8e')))) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF percentiles percentile)) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_COLREF activities actor_id)))) (TOK_WHERE (AND (AND (AND (AND (= (TOK_COLREF activities account) 'cUU5T7y6DmdzMJFcFt3JDe') (= (TOK_COLREF activities application) 'test')) (= (TOK_COLREF activities dataset) 'purchases')) (>= (TOK_COLREF activities hour) 341400)) (< (TOK_COLREF activities hour) 341401))) (TOK_GROUPBY (TOK_COLREF percentiles percentile))))

	STAGE DEPENDENCIES:
	Stage-1 is a root stage
	Stage-2 depends on stages: Stage-1
	Stage-3 depends on stages: Stage-2
	Stage-0 depends on stages: Stage-3, Stage-1
	Stage-0 depends on stages: Stage-3, Stage-1

	STAGE PLANS:
	Stage: Stage-1
	Map Reduce
	Alias -> Map Operator Tree:
	activities
	Select Operator
	expressions:
	expr: actor_id
	type: string
	expr: account
	type: string
	expr: application
	type: string
	expr: dataset
	type: string
	expr: hour
	type: string
	Reduce Output Operator
	key expressions:
	expr: 0
	type: string
	Map-reduce partition columns:
	expr: 0
	type: string
	tag: 0
	value expressions:
	expr: 0
	type: string
	expr: 1
	type: string
	expr: 2
	type: string
	expr: 3
	type: string
	expr: 4
	type: string
	percentiles
	Select Operator
	expressions:
	expr: actor_id
	type: string
	expr: percentile
	type: int
	Reduce Output Operator
	key expressions:
	expr: 0
	type: string
	Map-reduce partition columns:
	expr: 0
	type: string
	tag: 1
	value expressions:
	expr: 0
	type: string
	expr: 1
	type: int
	Reduce Operator Tree:
	Join Operator
	condition map:
	Left Outer Join0 to 1
	condition expressions:
	0 {VALUE.0} {VALUE.1} {VALUE.2} {VALUE.3} {VALUE.4}
	1 {VALUE.0} {VALUE.1}
	Filter Operator
	predicate:
	expr: (((((1 = 'cUU5T7y6DmdzMJFcFt3JDe') and (2 = 'test')) and (3 = 'purchases')) and (4 >= 341400)) and (4 < 341401))
	type: boolean
	File Output Operator
	table:
	input format: org.apache.hadoop.mapred.SequenceFileInputFormat
	output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
	name: binary_table

	Stage: Stage-2
	Map Reduce
	Alias -> Map Operator Tree:
	/tmp/hive-Josh/295739590/1568601541.10002
	Reduce Output Operator
	key expressions:
	expr: 6
	type: int
	Map-reduce partition columns:
	expr: rand()
	type: double
	tag: -1
	value expressions:
	expr: 0
	type: string
	Reduce Operator Tree:
	Group By Operator

	expr: count(VALUE.0)
	keys:
	expr: KEY.0
	type: int
	mode: partial1
	File Output Operator
	table:
	input format: org.apache.hadoop.mapred.SequenceFileInputFormat
	output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
	name: binary_table

	Stage: Stage-3
	Map Reduce
	Alias -> Map Operator Tree:
	/tmp/hive-Josh/295739590/1568601541.10001
	Reduce Output Operator
	key expressions:
	expr: 0
	type: int
	Map-reduce partition columns:
	expr: 0
	type: int
	tag: -1
	value expressions:
	expr: 1
	type: bigint
	Reduce Operator Tree:
	Group By Operator

	expr: count(VALUE.0)
	keys:
	expr: KEY.0
	type: int
	mode: unknown
	Select Operator
	expressions:
	expr: 0
	type: int
	expr: 1
	type: bigint
	Select Operator
	File Output Operator
	table:
	input format: org.apache.hadoop.mapred.TextInputFormat
	output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
	serde: org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe
	name: measurements

	Stage: Stage-0
	Move Operator
	tables:
	partition:
	measure percentile_count
	account cUU5T7y6DmdzMJFcFt3JDe
	application test
	dataset purchases
	hour 341400
	span 1
	options 6b5bb627456b7aada19b014ca8c62a3158d91e8e
	replace:
	table:
	input format: org.apache.hadoop.mapred.TextInputFormat
	output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
	serde: org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe
	name: measurements