Shrijeet shrijeet

## TestMRCJCFileOutputCommitter_test_fail.java
-------------------------------------------------------
 T E S T S
-------------------------------------------------------
Running org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter
Tests run: 3, Failures: 1, Errors: 1, Skipped: 0, Time elapsed: 1.612 sec <<< FAILURE! - in org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter
testAbort(org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter)  Time elapsed: 0.116 sec  <<< ERROR!
java.lang.NullPointerException: null
	at org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter.testAbort(TestMRCJCFileOutputCommitter.java:144)

testFailAbort(org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter)  Time elapsed: 0.071 sec  <<< FAILURE!

## Pi_job_fail.java
[shrijeet@xxx-209 ~]$ hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi  5 5
Number of Maps  = 5
Samples per Map = 5
Wrote input for Map #0
Wrote input for Map #1
Wrote input for Map #2
Wrote input for Map #3
Wrote input for Map #4
Starting Job
14/11/04 16:53:51 INFO client.RMProxy: Connecting to ResourceManager at yarn-rm.xxx-yyyy.example.net/172.0.0.0:8032

## Pi.java
    @Override
    public void cleanup(Context context) throws IOException {
      //write output to a file
      Configuration conf = context.getConfiguration();
      Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR));
      Path outFile = new Path(outDir, "reduce-out");
      FileSystem fileSys = FileSystem.get(conf);
      SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
          outFile, LongWritable.class, LongWritable.class,
          CompressionType.NONE);

## AppSchedulable.java
        if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0
            && localRequest != null && localRequest.getNumContainers() != 0) {
          return assignContainer(node, localRequest,
              NodeType.NODE_LOCAL, reserved);
        }

        if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) {
          continue;
        }

## clean_calls_regularly.patch
diff --git a/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java b/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
index 2cc1b04..c08a55e 100644
--- a/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
+++ b/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
@@ -209,6 +209,7 @@ public class HBaseClient {
    * socket connected to a remote address.  Calls are multiplexed through this
    * socket: responses may be delivered out of order. */
   private class Connection extends Thread {
+    protected static final long DEFAULT_CLEAN_INTERVAL = -1; // disabled by default
     private ConnectionId remoteId;

## HConnectionManager.java
/**
 * Copyright 2010 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at

## gist:1597560
select pid as pid,
       sum(if(bid is not null and bid <> '', 1, 0)) as bids,
       sum(1) as requests
from table
where data_date = 20120110
and (pid = 15368 or pid = 15369 or pid = 15370)
group by pid,
       sum(if(bid is not null and bid <> '', 1, 0)),
       sum(1)

## gist:1597563
FAILED: Hive Internal Error: java.lang.NullPointerException(null)
java.lang.NullPointerException
	at org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc.newInstance(ExprNodeGenericFuncDesc.java:214)
	at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory$DefaultExprProcessor.getXpathOrFuncExprNodeDesc(TypeCheckProcFactory.java:684)
	at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory$DefaultExprProcessor.process(TypeCheckProcFactory.java:805)
	at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:125)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102)
	at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.genExprNode(TypeCheckProcFactory.java:161)

## hive_merge_small_files.java
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
index a3e40f7..7674af4 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
@@ -381,7 +381,7 @@ public class MapRedTask extends ExecDriver implements Serializable {
           .printInfo("Number of reduce tasks is set to 0 since there's no reduce operator");
       work.setNumReduceTasks(Integer.valueOf(0));
     } else {
-      if (numReducersFromWork >= 0) {
+      if (numReducersFromWork > 0) {

## hive_mail.txt
Hive Version: Hive 0.8 (last commit SHA  b581a6192b8d4c544092679d05f45b2e50d42b45 )

Hadoop version : chd3u0

I am trying to use the hive merge small file feature by setting all the necessary params.
I am disabling use of CombineHiveInputFormat since my input is compressed text.

hive> set mapred.min.split.size.per.node=1000000000;
hive> set mapred.min.split.size.per.rack=1000000000;
hive> set mapred.max.split.size=1000000000;
	-------------------------------------------------------
	T E S T S
	-------------------------------------------------------
	Running org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter
	Tests run: 3, Failures: 1, Errors: 1, Skipped: 0, Time elapsed: 1.612 sec <<< FAILURE! - in org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter
	testAbort(org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter) Time elapsed: 0.116 sec <<< ERROR!
	java.lang.NullPointerException: null
	at org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter.testAbort(TestMRCJCFileOutputCommitter.java:144)

	testFailAbort(org.apache.hadoop.mapred.TestMRCJCFileOutputCommitter) Time elapsed: 0.071 sec <<< FAILURE!
	[shrijeet@xxx-209 ~]$ hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 5 5
	Number of Maps = 5
	Samples per Map = 5
	Wrote input for Map #0
	Wrote input for Map #1
	Wrote input for Map #2
	Wrote input for Map #3
	Wrote input for Map #4
	Starting Job
	14/11/04 16:53:51 INFO client.RMProxy: Connecting to ResourceManager at yarn-rm.xxx-yyyy.example.net/172.0.0.0:8032
	@Override
	public void cleanup(Context context) throws IOException {
	//write output to a file
	Configuration conf = context.getConfiguration();
	Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR));
	Path outFile = new Path(outDir, "reduce-out");
	FileSystem fileSys = FileSystem.get(conf);
	SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
	outFile, LongWritable.class, LongWritable.class,
	CompressionType.NONE);
	if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0
	&& localRequest != null && localRequest.getNumContainers() != 0) {
	return assignContainer(node, localRequest,
	NodeType.NODE_LOCAL, reserved);
	}

	if (rackLocalRequest != null && !rackLocalRequest.getRelaxLocality()) {
	continue;
	}
	diff --git a/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java b/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
	index 2cc1b04..c08a55e 100644
	--- a/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
	+++ b/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
	@@ -209,6 +209,7 @@ public class HBaseClient {
	* socket connected to a remote address. Calls are multiplexed through this
	* socket: responses may be delivered out of order. */
	private class Connection extends Thread {
	+ protected static final long DEFAULT_CLEAN_INTERVAL = -1; // disabled by default
	private ConnectionId remoteId;
	/**
	* Copyright 2010 The Apache Software Foundation
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	select pid as pid,
	sum(if(bid is not null and bid <> '', 1, 0)) as bids,
	sum(1) as requests
	from table
	where data_date = 20120110
	and (pid = 15368 or pid = 15369 or pid = 15370)
	group by pid,
	sum(if(bid is not null and bid <> '', 1, 0)),
	sum(1)
	FAILED: Hive Internal Error: java.lang.NullPointerException(null)
	java.lang.NullPointerException
	at org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc.newInstance(ExprNodeGenericFuncDesc.java:214)
	at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory$DefaultExprProcessor.getXpathOrFuncExprNodeDesc(TypeCheckProcFactory.java:684)
	at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory$DefaultExprProcessor.process(TypeCheckProcFactory.java:805)
	at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:125)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102)
	at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.genExprNode(TypeCheckProcFactory.java:161)
	diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
	index a3e40f7..7674af4 100644
	--- ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
	+++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
	@@ -381,7 +381,7 @@ public class MapRedTask extends ExecDriver implements Serializable {
	.printInfo("Number of reduce tasks is set to 0 since there's no reduce operator");
	work.setNumReduceTasks(Integer.valueOf(0));
	} else {
	- if (numReducersFromWork >= 0) {
	+ if (numReducersFromWork > 0) {
	Hive Version: Hive 0.8 (last commit SHA b581a6192b8d4c544092679d05f45b2e50d42b45 )

	Hadoop version : chd3u0

	I am trying to use the hive merge small file feature by setting all the necessary params.
	I am disabling use of CombineHiveInputFormat since my input is compressed text.

	hive> set mapred.min.split.size.per.node=1000000000;
	hive> set mapred.min.split.size.per.rack=1000000000;
	hive> set mapred.max.split.size=1000000000;