Last active
December 20, 2015 19:59
-
-
Save code6/6187569 to your computer and use it in GitHub Desktop.
hive11_auto_convert_join_bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hive> SELECT | |
> `dim_pay_date`.`date` | |
> , `deal`.`dealid` | |
> FROM `orderpayment_small` `orderpayment` | |
> JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` | |
> JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` | |
> JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` | |
> JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` | |
> limit 5; | |
Total MapReduce jobs = 4 | |
Execution log at: /tmp/code6/.log | |
2013-08-09 03:00:29 Starting to launch local task to process map join; maximum memory = 4176871424 | |
2013-08-09 03:00:30 Processing rows: 1 Hashtable size: 1 Memory usage: 11638640 rate: 0.003 | |
2013-08-09 03:00:30 Dump the hashtable into file: file:/tmp/code6/hive_2013-08-09_03-00-09_684_2877820299416247063/-local-10009/HashTable-Stage-14/MapJoin-mapfile31--.hashtable | |
2013-08-09 03:00:30 Upload 1 File to: file:/tmp/code6/hive_2013-08-09_03-00-09_684_2877820299416247063/-local-10009/HashTable-Stage-14/MapJoin-mapfile31--.hashtable File size: 346 | |
2013-08-09 03:00:30 Processing rows: 1 Hashtable size: 1 Memory usage: 16043368 rate: 0.004 | |
2013-08-09 03:00:30 Dump the hashtable into file: file:/tmp/code6/hive_2013-08-09_03-00-09_684_2877820299416247063/-local-10009/HashTable-Stage-14/MapJoin-mapfile21--.hashtable | |
2013-08-09 03:00:30 Upload 1 File to: file:/tmp/code6/hive_2013-08-09_03-00-09_684_2877820299416247063/-local-10009/HashTable-Stage-14/MapJoin-mapfile21--.hashtable File size: 336 | |
2013-08-09 03:00:30 Processing rows: 1 Hashtable size: 1 Memory usage: 18386344 rate: 0.004 | |
2013-08-09 03:00:30 Dump the hashtable into file: file:/tmp/code6/hive_2013-08-09_03-00-09_684_2877820299416247063/-local-10009/HashTable-Stage-14/MapJoin-mapfile40--.hashtable | |
2013-08-09 03:00:30 Upload 1 File to: file:/tmp/code6/hive_2013-08-09_03-00-09_684_2877820299416247063/-local-10009/HashTable-Stage-14/MapJoin-mapfile40--.hashtable File size: 359 | |
2013-08-09 03:00:30 End of local task; Time Taken: 1.467 sec. | |
Execution completed successfully | |
Mapred Local Task Succeeded . Convert the Join into MapJoin | |
Mapred Local Task Succeeded . Convert the Join into MapJoin | |
Launching Job 1 out of 4 | |
Number of reduce tasks is set to 0 since there's no reduce operator | |
Execution log at: /tmp/code6/.log | |
Job running in-process (local Hadoop) | |
Hadoop job information for null: number of mappers: 0; number of reducers: 0 | |
2013-08-09 03:00:37,601 null map = 0%, reduce = 0% | |
2013-08-09 03:00:40,605 null map = 100%, reduce = 0% | |
Ended Job = job_local_0001 | |
Execution completed successfully | |
Mapred Local Task Succeeded . Convert the Join into MapJoin | |
Stage-15 is filtered out by condition resolver. | |
Stage-16 is selected by condition resolver. | |
Stage-4 is filtered out by condition resolver. | |
Execution log at: /tmp/code6/.log | |
2013-08-09 03:00:44 Starting to launch local task to process map join; maximum memory = 4176871424 | |
java.lang.ArrayIndexOutOfBoundsException: 752 | |
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:180) | |
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:138) | |
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:195) | |
at org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:61) | |
at org.apache.hadoop.hive.serde2.objectinspector.DelegatedStructObjectInspector.getStructFieldData(DelegatedStructObjectInspector.java:79) | |
at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.evaluate(ExprNodeColumnEvaluator.java:98) | |
at org.apache.hadoop.hive.ql.exec.JoinUtil.computeMapJoinKeys(JoinUtil.java:160) | |
at org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.processOp(HashTableSinkOperator.java:332) | |
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:503) | |
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:847) | |
at org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:90) | |
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:503) | |
at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.startForward(MapredLocalTask.java:355) | |
at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.executeFromChildJVM(MapredLocalTask.java:303) | |
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.main(ExecDriver.java:761) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) | |
at java.lang.reflect.Method.invoke(Method.java:597) | |
at org.apache.hadoop.util.RunJar.main(RunJar.java:156) | |
Execution failed with exit status: 2 | |
Obtaining error information | |
Task failed! | |
Task ID: | |
Stage-16 | |
Logs: | |
/opt/tmp/hivelog/hive.log | |
FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask | |
ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask | |
Launching Job 3 out of 4 | |
Number of reduce tasks not specified. Estimated from input data size: 1 | |
In order to change the average load for a reducer (in bytes): | |
set hive.exec.reducers.bytes.per.reducer=<number> | |
In order to limit the maximum number of reducers: | |
set hive.exec.reducers.max=<number> | |
In order to set a constant number of reducers: | |
set mapred.reduce.tasks=<number> | |
Execution log at: /tmp/code6/.log | |
Job running in-process (local Hadoop) | |
Hadoop job information for null: number of mappers: 0; number of reducers: 0 | |
2013-08-09 03:00:51,813 null map = 0%, reduce = 0% | |
2013-08-09 03:00:54,816 null map = 100%, reduce = 0% | |
Ended Job = job_local_0001 with errors | |
Error during job, obtaining debugging information... | |
Execution failed with exit status: 2 | |
Obtaining error information | |
Task failed! | |
Task ID: | |
Stage-4 | |
Logs: | |
/opt/tmp/hivelog/hive.log | |
FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ABSTRACT SYNTAX TREE: | |
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `orderpayment`) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `dim_pay_date`) (= (. (TOK_TABLE_OR_COL `dim_pay_date`) `date`) (. (TOK_TABLE_OR_COL `orderpayment`) `date`))) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `deal`) (= (. (TOK_TABLE_OR_COL `deal`) `dealid`) (. (TOK_TABLE_OR_COL `orderpayment`) `dealid`))) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `order_city`) (= (. (TOK_TABLE_OR_COL `order_city`) `cityid`) (. (TOK_TABLE_OR_COL `orderpayment`) `cityid`))) (TOK_TABREF (TOK_TABNAME `user_small`) `user`) (= (. (TOK_TABLE_OR_COL `user`) `userid`) (. (TOK_TABLE_OR_COL `orderpayment`) `userid`)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL `dim_pay_date`) `date`)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL `deal`) `dealid`))) (TOK_LIMIT 5))) | |
STAGE DEPENDENCIES: | |
Stage-17 is a root stage | |
Stage-14 depends on stages: Stage-17 | |
Stage-11 depends on stages: Stage-14 , consists of Stage-15, Stage-16, Stage-4 | |
Stage-15 has a backup stage: Stage-4 | |
Stage-9 depends on stages: Stage-15 | |
Stage-16 has a backup stage: Stage-4 | |
Stage-10 depends on stages: Stage-16 | |
Stage-4 | |
Stage-0 is a root stage | |
STAGE PLANS: | |
Stage: Stage-17 | |
Map Reduce Local Work | |
Alias -> Map Local Tables: | |
deal | |
Fetch Operator | |
limit: -1 | |
order_city | |
Fetch Operator | |
limit: -1 | |
orderpayment | |
Fetch Operator | |
limit: -1 | |
Alias -> Map Local Operator Tree: | |
deal | |
TableScan | |
alias: deal | |
HashTable Sink Operator | |
condition expressions: | |
0 {dim_pay_date._col8} {orderpayment._col3} {orderpayment._col4} | |
1 {deal.dealid} | |
handleSkewJoin: false | |
keys: | |
0 [Column[_col0]] | |
1 [Column[dealid]] | |
Position of Big Table: 0 | |
order_city | |
TableScan | |
alias: order_city | |
HashTable Sink Operator | |
condition expressions: | |
0 {deal._col14} {dim_pay_date._col1} {orderpayment._col11} | |
1 | |
handleSkewJoin: false | |
keys: | |
0 [Column[_col10]] | |
1 [Column[cityid]] | |
Position of Big Table: 0 | |
orderpayment | |
TableScan | |
alias: orderpayment | |
HashTable Sink Operator | |
condition expressions: | |
0 {orderpayment.dealid} {orderpayment.cityid} {orderpayment.userid} | |
1 {dim_pay_date.date} | |
handleSkewJoin: false | |
keys: | |
0 [Column[date]] | |
1 [Column[date]] | |
Position of Big Table: 1 | |
Stage: Stage-14 | |
Map Reduce | |
Alias -> Map Operator Tree: | |
dim_pay_date | |
TableScan | |
alias: dim_pay_date | |
Map Join Operator | |
condition map: | |
Inner Join 0 to 1 | |
condition expressions: | |
0 {orderpayment.dealid} {orderpayment.cityid} {orderpayment.userid} | |
1 {dim_pay_date.date} | |
handleSkewJoin: false | |
keys: | |
0 [Column[date]] | |
1 [Column[date]] | |
outputColumnNames: _col0, _col3, _col4, _col8 | |
Position of Big Table: 1 | |
Map Join Operator | |
condition map: | |
Inner Join 0 to 1 | |
condition expressions: | |
0 {dim_pay_date._col8} {orderpayment._col3} {orderpayment._col4} | |
1 {deal.dealid} | |
handleSkewJoin: false | |
keys: | |
0 [Column[_col0]] | |
1 [Column[dealid]] | |
outputColumnNames: _col1, _col10, _col11, _col14 | |
Position of Big Table: 0 | |
Map Join Operator | |
condition map: | |
Inner Join 0 to 1 | |
condition expressions: | |
0 {deal._col14} {dim_pay_date._col1} {orderpayment._col11} | |
1 | |
handleSkewJoin: false | |
keys: | |
0 [Column[_col10]] | |
1 [Column[cityid]] | |
outputColumnNames: _col7, _col1, _col18 | |
Position of Big Table: 0 | |
File Output Operator | |
compressed: false | |
GlobalTableId: 0 | |
table: | |
input format: org.apache.hadoop.mapred.SequenceFileInputFormat | |
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | |
Local Work: | |
Map Reduce Local Work | |
Stage: Stage-11 | |
Conditional Operator | |
Stage: Stage-15 | |
Map Reduce Local Work | |
Alias -> Map Local Tables: | |
user | |
Fetch Operator | |
limit: -1 | |
Alias -> Map Local Operator Tree: | |
user | |
TableScan | |
alias: user | |
HashTable Sink Operator | |
condition expressions: | |
0 {dim_pay_date._col1} {deal._col7} | |
1 | |
handleSkewJoin: false | |
keys: | |
0 [Column[_col18]] | |
1 [Column[userid]] | |
Position of Big Table: 0 | |
Stage: Stage-9 | |
Map Reduce | |
Alias -> Map Operator Tree: | |
$INTNAME | |
Map Join Operator | |
condition map: | |
Inner Join 0 to 1 | |
condition expressions: | |
0 {dim_pay_date._col1} {deal._col7} | |
1 | |
handleSkewJoin: false | |
keys: | |
0 [Column[_col18]] | |
1 [Column[userid]] | |
outputColumnNames: _col1, _col7 | |
Position of Big Table: 0 | |
Select Operator | |
expressions: | |
expr: dim_pay_date._col1 | |
type: string | |
expr: deal._col7 | |
type: int | |
outputColumnNames: _col0, _col1 | |
Limit | |
File Output Operator | |
compressed: false | |
GlobalTableId: 0 | |
table: | |
input format: org.apache.hadoop.mapred.TextInputFormat | |
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | |
Local Work: | |
Map Reduce Local Work | |
Stage: Stage-16 | |
Map Reduce Local Work | |
Alias -> Map Local Tables: | |
$INTNAME | |
Fetch Operator | |
limit: -1 | |
Alias -> Map Local Operator Tree: | |
$INTNAME | |
HashTable Sink Operator | |
condition expressions: | |
0 {dim_pay_date._col1} {deal._col7} | |
1 | |
handleSkewJoin: false | |
keys: | |
0 [Column[_col18]] | |
1 [Column[userid]] | |
Position of Big Table: 1 | |
Stage: Stage-10 | |
Map Reduce | |
Alias -> Map Operator Tree: | |
user | |
TableScan | |
alias: user | |
Map Join Operator | |
condition map: | |
Inner Join 0 to 1 | |
condition expressions: | |
0 {dim_pay_date._col1} {deal._col7} | |
1 | |
handleSkewJoin: false | |
keys: | |
0 [Column[_col18]] | |
1 [Column[userid]] | |
outputColumnNames: _col1, _col7 | |
Position of Big Table: 1 | |
Select Operator | |
expressions: | |
expr: dim_pay_date._col1 | |
type: string | |
expr: deal._col7 | |
type: int | |
outputColumnNames: _col0, _col1 | |
Limit | |
File Output Operator | |
compressed: false | |
GlobalTableId: 0 | |
table: | |
input format: org.apache.hadoop.mapred.TextInputFormat | |
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | |
Local Work: | |
Map Reduce Local Work | |
Stage: Stage-4 | |
Map Reduce | |
Alias -> Map Operator Tree: | |
$INTNAME | |
Reduce Output Operator | |
key expressions: | |
expr: orderpayment._col18 | |
type: int | |
sort order: + | |
Map-reduce partition columns: | |
expr: orderpayment._col18 | |
type: int | |
tag: 0 | |
value expressions: | |
expr: deal._col7 | |
type: int | |
expr: dim_pay_date._col1 | |
type: string | |
user | |
TableScan | |
alias: user | |
Reduce Output Operator | |
key expressions: | |
expr: user.userid | |
type: int | |
sort order: + | |
Map-reduce partition columns: | |
expr: user.userid | |
type: int | |
tag: 1 | |
Reduce Operator Tree: | |
Join Operator | |
condition map: | |
Inner Join 0 to 1 | |
condition expressions: | |
0 {dim_pay_date.VALUE._col8} {deal.VALUE._col0} | |
1 | |
handleSkewJoin: false | |
outputColumnNames: _col1, _col7 | |
Select Operator | |
expressions: | |
expr: dim_pay_date._col1 | |
type: string | |
expr: deal._col7 | |
type: int | |
outputColumnNames: _col0, _col1 | |
Limit | |
File Output Operator | |
compressed: false | |
GlobalTableId: 0 | |
table: | |
input format: org.apache.hadoop.mapred.TextInputFormat | |
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | |
Stage: Stage-0 | |
Fetch Operator | |
limit: 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use test; | |
create table if not exists src ( `key` int,`val` string); | |
load data local inpath '/Users/code6/git/hive/data/files/kv1.txt' overwrite into table src; | |
drop table if exists orderpayment_small; | |
create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int); | |
insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from src limit 1; | |
drop table if exists user_small; | |
create table user_small( userid int); | |
insert overwrite table user_small select key from src limit 100; | |
set hive.auto.convert.join.noconditionaltask.size = 200; | |
SELECT | |
`dim_pay_date`.`date` | |
, `deal`.`dealid` | |
FROM `orderpayment_small` `orderpayment` | |
JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` | |
JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` | |
JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` | |
JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` | |
limit 5; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use test; | |
create table if not exists src ( `key` int,`val` string); | |
load data local inpath '/Users/code6/git/hive/data/files/kv1.txt' overwrite into table src; | |
drop table if exists orderpayment_small; | |
create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int); | |
insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from src limit 1; | |
drop table if exists user_small; | |
create table user_small( userid int); | |
insert overwrite table user_small select key from src limit 100; | |
set hive.auto.convert.join.noconditionaltask.size = 200; | |
SELECT | |
`A`.`date` | |
, `deal`.`dealid` | |
FROM `orderpayment_small` `orderpayment` | |
JOIN `orderpayment_small` `A` ON `A`.`date` = `orderpayment`.`date` | |
JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` | |
JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` | |
JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` | |
limit 5; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment