Josh Rosen JoshRosen

## out.diff
diff --git a/OpenHashMap.class.asm b/OpenHashMap.class.asm
index aa713d0..b684cf3 100644
--- a/OpenHashMap.class.asm
+++ b/OpenHashMap.class.asm
@@ -1375,10 +1375,10 @@
     MAXSTACK = 1
     MAXLOCALS = 1

-  // access flags 0x1
+  // access flags 0x2

## bench.scala
def timeAndRecordAllocations(
    numWarmups: Int,
    numTrials: Int
  )(functionToBenchmark: => Unit): Unit = {

    import java.lang.management.ManagementFactory
    import com.sun.management.ThreadMXBean
    val threadMxBean = ManagementFactory.getThreadMXBean.asInstanceOf[ThreadMXBean]
    val threadId = Thread.currentThread.getId

## out.diff
diff --git a/OpenHashMap$mcD$sp.class.asm b/OpenHashMap$mcD$sp.class.asm
index 3989e91..ea49dbb 100644
--- a/OpenHashMap$mcD$sp.class.asm
+++ b/OpenHashMap$mcD$sp.class.asm
@@ -29,6 +29,54 @@ public class org/apache/spark/util/collection/OpenHashMap$mcD$sp extends org/apa
   // access flags 0x1
   public D nullValue$mcD$sp

+  // access flags 0x1019
+  public final static synthetic $anonfun$changeValue$3(Lorg/apache/spark/util/collection/OpenHashMap$mcD$sp;I)V

## janino_90.java
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */   private Object[] references;
/* 008 */   private scala.collection.Iterator[] inputs;
/* 009 */   private boolean range_initRange_0;
/* 010 */   private long range_nextIndex_0;

## scapegoat-to-csv-spark.py
import xml.etree.ElementTree as ET
import glob
import fnmatch
import os
import csv

SCAPEGOAT_VERSION = 'd9392e5072e3e408dd232e6fc799e0ac1640189b'
SPARK_VERSION = '4816c2ef5e04eb2dd70bed8b99882aa0b7fe7fd7'
SPARK_HOME = '/Users/joshrosen/Documents/spark/'

## spark_flame_graphs.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              2 stars
            
          
                JoshRosen
                / spark_flame_graphs.md
            
            
              Created
              August 29, 2016 19:31
                — forked from kayousterhout/spark_flame_graphs.md
            
          
    Generating Flame Graphs for Apache Spark

Flame graphs are a nifty debugging tool to determine where CPU time is being spent.  Using the Java Flight recorder, you can do this for Java processes without adding significant runtime overhead.
When are flame graphs useful?

Shivaram Venkataraman and I have found these flame recordings to be useful for diagnosing coarse-grained performance problems. We started using them at the suggestion of Josh Rosen, who quickly made one for the Spark scheduler when we were talking to him about why the scheduler caps out at a throughput of a few thousand tasks per second. Josh generated a graph similar to the one below, which illustrates that a significant amount of time is spent in serialization (if you click in the top right hand corner and search for "serialize", you can see that 78.6% of the sampled CPU time was spent in serialization). We used this insight to spee

  
## apply-patch.sh
#!/bin/bash

apply () {
  filename=$1
  shift
  patch_args=$*

  gotSubject=no
  msg=""

## Casting structs.html
<!DOCTYPE html>
<html>
<head>
  <meta name="databricks-html-version" content="1">
<title>Casting structs - Databricks</title>

<meta charset="utf-8">
<meta name="google" content="notranslate">
<meta http-equiv="Content-Language" content="en">
<meta http-equiv="Content-Type" content="text/html; charset=UTF8">

## pom.diff
diff --git before after
index bdf27c9..88b5282 100644
--- before
+++ after
@@ -1,3253 +1,3241 @@
 [INFO] Scanning for projects...
 [WARNING]
 [WARNING] Some problems were encountered while building the effective model for org.apache.spark:spark-core_2.11:jar:2.0.0-SNAPSHOT
 [WARNING] The expression ${pom.version} is deprecated. Please use ${project.version} instead.
 [WARNING]

## gist:307c754749345e811c257681b28cd97c
[INFO] Scanning for projects...
[WARNING]
[WARNING] Some problems were encountered while building the effective model for org.apache.spark:spark-core_2.11:jar:2.0.0-SNAPSHOT
[WARNING] The expression ${pom.version} is deprecated. Please use ${project.version} instead.
[WARNING]
[WARNING] It is highly recommended to fix these problems because they threaten the stability of your build.
[WARNING]
[WARNING] For this reason, future Maven versions might no longer support building such malformed projects.
[WARNING]
[INFO] ------------------------------------------------------------------------
	diff --git a/OpenHashMap.class.asm b/OpenHashMap.class.asm
	index aa713d0..b684cf3 100644
	--- a/OpenHashMap.class.asm
	+++ b/OpenHashMap.class.asm
	@@ -1375,10 +1375,10 @@
	MAXSTACK = 1
	MAXLOCALS = 1

	- // access flags 0x1
	+ // access flags 0x2
	def timeAndRecordAllocations(
	numWarmups: Int,
	numTrials: Int
	)(functionToBenchmark: => Unit): Unit = {

	import java.lang.management.ManagementFactory
	import com.sun.management.ThreadMXBean
	val threadMxBean = ManagementFactory.getThreadMXBean.asInstanceOf[ThreadMXBean]
	val threadId = Thread.currentThread.getId
	diff --git a/OpenHashMap$mcD$sp.class.asm b/OpenHashMap$mcD$sp.class.asm
	index 3989e91..ea49dbb 100644
	--- a/OpenHashMap$mcD$sp.class.asm
	+++ b/OpenHashMap$mcD$sp.class.asm
	@@ -29,6 +29,54 @@ public class org/apache/spark/util/collection/OpenHashMap$mcD$sp extends org/apa
	// access flags 0x1
	public D nullValue$mcD$sp

	+ // access flags 0x1019
	+ public final static synthetic $anonfun$changeValue$3(Lorg/apache/spark/util/collection/OpenHashMap$mcD$sp;I)V
	/* 001 */ public Object generate(Object[] references) {
	/* 002 */ return new GeneratedIteratorForCodegenStage1(references);
	/* 003 */ }
	/* 004 */
	/* 005 */ // codegenStageId=1
	/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
	/* 007 */ private Object[] references;
	/* 008 */ private scala.collection.Iterator[] inputs;
	/* 009 */ private boolean range_initRange_0;
	/* 010 */ private long range_nextIndex_0;
	import xml.etree.ElementTree as ET
	import glob
	import fnmatch
	import os
	import csv

	SCAPEGOAT_VERSION = 'd9392e5072e3e408dd232e6fc799e0ac1640189b'
	SPARK_VERSION = '4816c2ef5e04eb2dd70bed8b99882aa0b7fe7fd7'
	SPARK_HOME = '/Users/joshrosen/Documents/spark/'
	#!/bin/bash

	apply () {
	filename=$1
	shift
	patch_args=$*

	gotSubject=no
	msg=""
	<!DOCTYPE html>
	<html>
	<head>
	<meta name="databricks-html-version" content="1">
	<title>Casting structs - Databricks</title>

	<meta charset="utf-8">
	<meta name="google" content="notranslate">
	<meta http-equiv="Content-Language" content="en">
	<meta http-equiv="Content-Type" content="text/html; charset=UTF8">
	diff --git before after
	index bdf27c9..88b5282 100644
	--- before
	+++ after
	@@ -1,3253 +1,3241 @@
	[INFO] Scanning for projects...
	[WARNING]
	[WARNING] Some problems were encountered while building the effective model for org.apache.spark:spark-core_2.11:jar:2.0.0-SNAPSHOT
	[WARNING] The expression ${pom.version} is deprecated. Please use ${project.version} instead.
	[WARNING]