Reynold Xin rxin

## ramdisk.sh
#!/bin/bash

# From http://tech.serbinn.net/2010/shell-script-to-create-ramdisk-on-mac-os-x/
#

ARGS=2
E_BADARGS=99

if [ $# -ne $ARGS ] # correct number of arguments to the script;
then

## benchmark.scala
package org.apache.spark.sql.catalyst.expressions.codegen

import org.codehaus.janino.SimpleCompiler


object CodeGenBenchmark {

  def quasiquotes(): Unit = {
    import scala.reflect.runtime.{universe => ru}
    import scala.reflect.runtime.universe._

## ByteBufferPerf.scala

/**
 * To compile:
 *  scalac -optimize ByteBufferPerf.scala
 *
 * JAVA_OPTS="-Xmx2g" scala IntArrayPerf 10
 *  49  62  48  45  48  45  48  50  47  45
 *
 * JAVA_OPTS="-Xmx2g" scala ByteBufferPerf 10
 *  479 491 484 480 484 481 477 477 472 473

## BinarySearch.java
package com.databricks.unsafe.util.benchmark;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

## generated-assembly.txt
Decoding compiled method 0x00007f4d0510f9d0:
Code:
[Entry Point]
[Verified Entry Point]
[Constants]
  # {method} {0x00007f4ce9662458} 'join' '(JI)J' in 'Test'
  0x00007f4d0510fb20: call   0x00007f4d1abd5a30  ;   {runtime_call}
  0x00007f4d0510fb25: data16 data16 nop WORD PTR [rax+rax*1+0x0]
  0x00007f4d0510fb30: mov    DWORD PTR [rsp-0x14000],eax
  0x00007f4d0510fb37: push   rbp

## df.py
data = sqlContext.load("/home/rxin/ints.parquet")
data.groupBy("a").agg(col("a"), avg("num")).collect()

## gist:6896688

  def takeAsync(num: Int): FutureAction[Seq[T]] = {
    val promise = new CancellablePromise[Seq[T]]

    promise.run {
      val buf = new ArrayBuffer[T](num)
      val totalParts = self.partitions.length
      var partsScanned = 0
      while (buf.size < num && partsScanned < totalParts && !promise.cancelled) {
        // The number of partitions to try in this iteration. It is ok for this number to be

## update.sh
set -e
set -o pipefail

/root/spark/bin/stop-all.sh

rm -rf ~/.ivy2/local/org.spark*
rm -rf ~/.ivy2/cache/org.spark*

cd /root/spark
git checkout master

## InsertPerf.scala


// 1001  381	384	384	383	384	407	404	409	407

object ArrayBufferBenchmark extends scala.testing.Benchmark {

  def run = {

    val len = 10 * 1000 * 1000
    val a = new scala.collection.mutable.ArrayBuffer[Int](len)

## BytecodeAnalyzer.scala
package spark.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable

import org.objectweb.asm.{ClassReader, MethodVisitor}
import org.objectweb.asm.commons.EmptyVisitor
import org.objectweb.asm.Opcodes._
	#!/bin/bash

	# From http://tech.serbinn.net/2010/shell-script-to-create-ramdisk-on-mac-os-x/
	#

	ARGS=2
	E_BADARGS=99

	if [ $# -ne $ARGS ] # correct number of arguments to the script;
	then
	package org.apache.spark.sql.catalyst.expressions.codegen

	import org.codehaus.janino.SimpleCompiler


	object CodeGenBenchmark {

	def quasiquotes(): Unit = {
	import scala.reflect.runtime.{universe => ru}
	import scala.reflect.runtime.universe._

	/**
	* To compile:
	* scalac -optimize ByteBufferPerf.scala
	*
	* JAVA_OPTS="-Xmx2g" scala IntArrayPerf 10
	* 49 62 48 45 48 45 48 50 47 45
	*
	* JAVA_OPTS="-Xmx2g" scala ByteBufferPerf 10
	* 479 491 484 480 484 481 477 477 472 473
	package com.databricks.unsafe.util.benchmark;

	import org.openjdk.jmh.annotations.Benchmark;
	import org.openjdk.jmh.annotations.Param;
	import org.openjdk.jmh.annotations.Scope;
	import org.openjdk.jmh.annotations.State;
	import org.openjdk.jmh.runner.Runner;
	import org.openjdk.jmh.runner.RunnerException;
	import org.openjdk.jmh.runner.options.Options;
	import org.openjdk.jmh.runner.options.OptionsBuilder;
	Decoding compiled method 0x00007f4d0510f9d0:
	Code:
	[Entry Point]
	[Verified Entry Point]
	[Constants]
	# {method} {0x00007f4ce9662458} 'join' '(JI)J' in 'Test'
	0x00007f4d0510fb20: call 0x00007f4d1abd5a30 ; {runtime_call}
	0x00007f4d0510fb25: data16 data16 nop WORD PTR [rax+rax*1+0x0]
	0x00007f4d0510fb30: mov DWORD PTR [rsp-0x14000],eax
	0x00007f4d0510fb37: push rbp
	data = sqlContext.load("/home/rxin/ints.parquet")
	data.groupBy("a").agg(col("a"), avg("num")).collect()

	def takeAsync(num: Int): FutureAction[Seq[T]] = {
	val promise = new CancellablePromise[Seq[T]]

	promise.run {
	val buf = new ArrayBuffer[T](num)
	val totalParts = self.partitions.length
	var partsScanned = 0
	while (buf.size < num && partsScanned < totalParts && !promise.cancelled) {
	// The number of partitions to try in this iteration. It is ok for this number to be
	set -e
	set -o pipefail

	/root/spark/bin/stop-all.sh

	rm -rf ~/.ivy2/local/org.spark*
	rm -rf ~/.ivy2/cache/org.spark*

	cd /root/spark
	git checkout master


	// 1001 381 384 384 383 384 407 404 409 407

	object ArrayBufferBenchmark extends scala.testing.Benchmark {

	def run = {

	val len = 10 * 1000 * 1000
	val a = new scala.collection.mutable.ArrayBuffer[Int](len)
	package spark.util

	import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

	import scala.collection.mutable

	import org.objectweb.asm.{ClassReader, MethodVisitor}
	import org.objectweb.asm.commons.EmptyVisitor
	import org.objectweb.asm.Opcodes._