Cheng Su c21

## stack trace of example in SPARK-34796
17:53:05.196 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

17:53:14.001 ERROR org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 54, Column 8: Expression "_limit_counter_1" is not an rvalue
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 54, Column 8: Expression "_limit_counter_1" is not an rvalue
	at org.codehaus.janino.UnitCompiler.compileError(UnitCompiler.java:12021)
	at org.codehaus.janino.UnitCompiler.toRvalueOrCompileException(UnitCompiler.java:7575)
	at org.codehaus.janino.UnitCompiler.getConstantValue2(UnitCompiler.java:5766)
	at org.codehaus.janino.UnitCompiler.access$10700(UnitCompiler.java:226)
	at org.codehaus.janino.UnitCompiler$18$1.visitAmbiguousName(UnitCompiler.java:5717)
	at org.codehaus.janino.Java$AmbiguousName.accept(Java.java:4429)

## DataSourceReadBenchmark-results.txt
================================================================================================
SQL Single Numeric Column Scan
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_282-b08 on Linux 4.14.225-169.362.amzn2.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
SQL CSV                                           21300          21444         204          0.7        1354.2       1.0X
SQL Json                                          11436          11521         119          1.4         727.1       1.9X

## OrcReadBenchmark-results.txt
================================================================================================
SQL Single Numeric Column Scan
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_282-b08 on Linux 4.14.225-169.362.amzn2.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Native ORC MR                                      1816           1844          40          8.7         115.4       1.0X
Native ORC Vectorized                               316            345          45         49.8          20.1       5.7X

## gist:d861bf7a1cece929a604f4bd77f3dc72
== Subtree 2 / 2 (maxMethodCodeSize:241; maxConstantPoolSize:265(0.40% used); numInnerClasses:1) ==
*(2) HashAggregate(keys=[key#57], functions=[avg(value#58)], output=[key#57, avg(value)#60])
+- Exchange hashpartitioning(key#57, 5), ENSURE_REQUIREMENTS, [id=#65]
   +- *(1) HashAggregate(keys=[key#57], functions=[partial_avg(value#58)], output=[key#57, sum#65, count#66L])
      +- *(1) ColumnarToRow
         +- FileScan parquet default.agg1[key#57,value#58] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/y5/hnsw8mz93vs57ngcd30y6y9c0000gn/T/warehous..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:int>

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage2(references);

## gist:d0f704c0a33c24ec05387ff4df438bff
== Subtree 2 / 2 (maxMethodCodeSize:241; maxConstantPoolSize:269(0.41% used); numInnerClasses:1) ==
*(2) HashAggregateWithControlledFallback ArrayBuffer(key#57) List(avg(value#58)) List(key#57, avg(value#58)#59 AS avg(value)#60) fallbackStartsAt=(2,3)
+- Exchange hashpartitioning(key#57, 5), ENSURE_REQUIREMENTS, [id=#65]
   +- *(1) HashAggregateWithControlledFallback ArrayBuffer(key#57) List(partial_avg(value#58)) ArrayBuffer(key#57, sum#65, count#66L) fallbackStartsAt=(2,3)
      +- *(1) ColumnarToRow
         +- FileScan parquet default.agg1[key#57,value#58] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/y5/hnsw8mz93vs57ngcd30y6y9c0000gn/T/warehous..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:int>

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage2(references);

## gist:5a9d7abb915147832f72c9c1a9047ff4
testFallbackStartsAt = Some(2, 3)

val bitMaxCapacity = testFallbackStartsAt match {
  case Some(c1, _) => log2(c1)
  case _ => sqlContext.conf.fastHashAggregateRowMaxCapacityBit
}

// 1st level hash map
agg_buffer = fastHashMap.findOrInsert(key);

## gist:20c10cc8e2c7e561aafbe9b8da055242
== Subtree 2 / 2 (maxMethodCodeSize:248; maxConstantPoolSize:279(0.43% used); numInnerClasses:2) ==
*(2) HashAggregate(keys=[key#57], functions=[avg(value#58)], output=[key#57, avg(value)#60])
+- Exchange hashpartitioning(key#57, 5), ENSURE_REQUIREMENTS, [id=#65]
   +- *(1) HashAggregate(keys=[key#57], functions=[partial_avg(value#58)], output=[key#57, sum#65, count#66L])
      +- *(1) ColumnarToRow
         +- FileScan parquet default.agg1[key#57,value#58] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/y5/hnsw8mz93vs57ngcd30y6y9c0000gn/T/warehous..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:int>

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage2(references);

## gist:dabf176cbc18a5e2138bc0a29e81c878
== Subtree 2 / 2 (maxMethodCodeSize:248; maxConstantPoolSize:282(0.43% used); numInnerClasses:2) ==
*(2) HashAggregateWithControlledFallback ArrayBuffer(key#57) List(avg(value#58)) List(key#57, avg(value#58)#59 AS avg(value)#60) fallbackStartsAt=(2,3)
+- Exchange hashpartitioning(key#57, 5), ENSURE_REQUIREMENTS, [id=#65]
   +- *(1) HashAggregateWithControlledFallback ArrayBuffer(key#57) List(partial_avg(value#58)) ArrayBuffer(key#57, sum#65, count#66L) fallbackStartsAt=(2,3)
      +- *(1) ColumnarToRow
         +- FileScan parquet default.agg1[key#57,value#58] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/y5/hnsw8mz93vs57ngcd30y6y9c0000gn/T/warehous..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:int>

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage2(references);

## gist:196166411d5d0406d9a76b37be889194
23:03:27.182 ERROR org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 99, Column 86: Statement is unreachable
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 99, Column 86: Statement is unreachable
	at org.codehaus.janino.UnitCompiler.compileError(UnitCompiler.java:12021)
	at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1570)
	at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3420)
	at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1362)
	at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1335)
	at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:807)
	at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:975)
	at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:226)

## gist:873775bcd08583105b289e67221f6e17
Found 8 WholeStageCodegen subtrees.
== Subtree 1 / 8 (maxMethodCodeSize:282; maxConstantPoolSize:184(0.28% used); numInnerClasses:0) ==
*(1) Project [id#8L AS k3#10L]
+- *(1) Range (0, 6, step=1, splits=2)

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
	17:53:05.196 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

	17:53:14.001 ERROR org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 54, Column 8: Expression "_limit_counter_1" is not an rvalue
	org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 54, Column 8: Expression "_limit_counter_1" is not an rvalue
	at org.codehaus.janino.UnitCompiler.compileError(UnitCompiler.java:12021)
	at org.codehaus.janino.UnitCompiler.toRvalueOrCompileException(UnitCompiler.java:7575)
	at org.codehaus.janino.UnitCompiler.getConstantValue2(UnitCompiler.java:5766)
	at org.codehaus.janino.UnitCompiler.access$10700(UnitCompiler.java:226)
	at org.codehaus.janino.UnitCompiler$18$1.visitAmbiguousName(UnitCompiler.java:5717)
	at org.codehaus.janino.Java$AmbiguousName.accept(Java.java:4429)
	================================================================================================
	SQL Single Numeric Column Scan
	================================================================================================

	OpenJDK 64-Bit Server VM 1.8.0_282-b08 on Linux 4.14.225-169.362.amzn2.x86_64
	Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
	SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
	------------------------------------------------------------------------------------------------------------------------
	SQL CSV 21300 21444 204 0.7 1354.2 1.0X
	SQL Json 11436 11521 119 1.4 727.1 1.9X
	== Subtree 2 / 2 (maxMethodCodeSize:241; maxConstantPoolSize:265(0.40% used); numInnerClasses:1) ==
	*(2) HashAggregate(keys=[key#57], functions=[avg(value#58)], output=[key#57, avg(value)#60])
	+- Exchange hashpartitioning(key#57, 5), ENSURE_REQUIREMENTS, [id=#65]
	+- *(1) HashAggregate(keys=[key#57], functions=[partial_avg(value#58)], output=[key#57, sum#65, count#66L])
	+- *(1) ColumnarToRow
	+- FileScan parquet default.agg1[key#57,value#58] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/y5/hnsw8mz93vs57ngcd30y6y9c0000gn/T/warehous..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:int>

	Generated code:
	/* 001 */ public Object generate(Object[] references) {
	/* 002 */ return new GeneratedIteratorForCodegenStage2(references);
	== Subtree 2 / 2 (maxMethodCodeSize:241; maxConstantPoolSize:269(0.41% used); numInnerClasses:1) ==
	*(2) HashAggregateWithControlledFallback ArrayBuffer(key#57) List(avg(value#58)) List(key#57, avg(value#58)#59 AS avg(value)#60) fallbackStartsAt=(2,3)
	+- Exchange hashpartitioning(key#57, 5), ENSURE_REQUIREMENTS, [id=#65]
	+- *(1) HashAggregateWithControlledFallback ArrayBuffer(key#57) List(partial_avg(value#58)) ArrayBuffer(key#57, sum#65, count#66L) fallbackStartsAt=(2,3)
	+- *(1) ColumnarToRow
	+- FileScan parquet default.agg1[key#57,value#58] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/y5/hnsw8mz93vs57ngcd30y6y9c0000gn/T/warehous..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:int>

	Generated code:
	/* 001 */ public Object generate(Object[] references) {
	/* 002 */ return new GeneratedIteratorForCodegenStage2(references);
	testFallbackStartsAt = Some(2, 3)

	val bitMaxCapacity = testFallbackStartsAt match {
	case Some(c1, _) => log2(c1)
	case _ => sqlContext.conf.fastHashAggregateRowMaxCapacityBit
	}

	// 1st level hash map
	agg_buffer = fastHashMap.findOrInsert(key);
	== Subtree 2 / 2 (maxMethodCodeSize:248; maxConstantPoolSize:279(0.43% used); numInnerClasses:2) ==
	*(2) HashAggregate(keys=[key#57], functions=[avg(value#58)], output=[key#57, avg(value)#60])
	+- Exchange hashpartitioning(key#57, 5), ENSURE_REQUIREMENTS, [id=#65]
	+- *(1) HashAggregate(keys=[key#57], functions=[partial_avg(value#58)], output=[key#57, sum#65, count#66L])
	+- *(1) ColumnarToRow
	+- FileScan parquet default.agg1[key#57,value#58] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/y5/hnsw8mz93vs57ngcd30y6y9c0000gn/T/warehous..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:int>

	Generated code:
	/* 001 */ public Object generate(Object[] references) {
	/* 002 */ return new GeneratedIteratorForCodegenStage2(references);
	23:03:27.182 ERROR org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 99, Column 86: Statement is unreachable
	org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 99, Column 86: Statement is unreachable
	at org.codehaus.janino.UnitCompiler.compileError(UnitCompiler.java:12021)
	at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1570)
	at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3420)
	at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1362)
	at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1335)
	at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:807)
	at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:975)
	at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:226)
	Found 8 WholeStageCodegen subtrees.
	== Subtree 1 / 8 (maxMethodCodeSize:282; maxConstantPoolSize:184(0.28% used); numInnerClasses:0) ==
	*(1) Project [id#8L AS k3#10L]
	+- *(1) Range (0, 6, step=1, splits=2)

	Generated code:
	/* 001 */ public Object generate(Object[] references) {
	/* 002 */ return new GeneratedIteratorForCodegenStage1(references);
	/* 003 */ }
	/* 004 */