Reynold Xin rxin

## ampcamp-ecnu-2013-data.sh
################################################################################
# Step 1. Download wiki traffic log.
# from
#  https://s3.amazonaws.com/ampcamp/ampcamp-ecnu-2013/wikistats/part-00095.gz
# to
#  https://s3.amazonaws.com/ampcamp/ampcamp-ecnu-2013/wikistats/part-00168.gz
# Note that 095 and 168 are both 0 bytes. The sole purpose of their existence is
# to verify the downloads.

# NOTE THAT THE FOLLOWING SCRIPT STARTS wget AS BACKGROUND PROCESSES.

## ramdisk.sh
#!/bin/bash

# From http://tech.serbinn.net/2010/shell-script-to-create-ramdisk-on-mac-os-x/
#

ARGS=2
E_BADARGS=99

if [ $# -ne $ARGS ] # correct number of arguments to the script;
then

## ByteBufferPerf.scala

/**
 * To compile:
 *  scalac -optimize ByteBufferPerf.scala
 *
 * JAVA_OPTS="-Xmx2g" scala IntArrayPerf 10
 *  49  62  48  45  48  45  48  50  47  45
 *
 * JAVA_OPTS="-Xmx2g" scala ByteBufferPerf 10
 *  479 491 484 480 484 481 477 477 472 473

## testwrite.scala
def testWrite(path: String): Long = {
  val startTime = System.currentTimeMillis()
  val out = new java.io.FileWriter(path)
  var i = 1
  val bytes = " " * (1024 * 1024)
  while (i < 1000) {
    out.write(bytes)
    i += 1
  }
  out.close

## BytecodeAnalyzer.scala
package spark.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable

import org.objectweb.asm.{ClassReader, MethodVisitor}
import org.objectweb.asm.commons.EmptyVisitor
import org.objectweb.asm.Opcodes._

## InsertPerf.scala


// 1001  381	384	384	383	384	407	404	409	407

object ArrayBufferBenchmark extends scala.testing.Benchmark {

  def run = {

    val len = 10 * 1000 * 1000
    val a = new scala.collection.mutable.ArrayBuffer[Int](len)

## update.sh
set -e
set -o pipefail

/root/spark/bin/stop-all.sh

rm -rf ~/.ivy2/local/org.spark*
rm -rf ~/.ivy2/cache/org.spark*

cd /root/spark
git checkout master

## gist:6896688

  def takeAsync(num: Int): FutureAction[Seq[T]] = {
    val promise = new CancellablePromise[Seq[T]]

    promise.run {
      val buf = new ArrayBuffer[T](num)
      val totalParts = self.partitions.length
      var partsScanned = 0
      while (buf.size < num && partsScanned < totalParts && !promise.cancelled) {
        // The number of partitions to try in this iteration. It is ok for this number to be

## gist:8910734
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *

## microbenchmark.markdown

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rxin
                / microbenchmark.markdown
            
            
              Created
              May 28, 2014 22:55
                — forked from pchiusano/microbenchmark.markdown
            
          
    I was curious about the results reported here, which reports that Scala's mutable maps are slower than Java's: http://www.infoq.com/news/2011/11/yammer-scala
In my tests, Scala's OpenHashMap equals or beats java's HashMap:
Insertion 100k elements (String keys) time in ms:

scala HashMap: 92.75
scala OpenHashMap: 14.03125
java HashMap: 15.78125
	################################################################################
	# Step 1. Download wiki traffic log.
	# from
	# https://s3.amazonaws.com/ampcamp/ampcamp-ecnu-2013/wikistats/part-00095.gz
	# to
	# https://s3.amazonaws.com/ampcamp/ampcamp-ecnu-2013/wikistats/part-00168.gz
	# Note that 095 and 168 are both 0 bytes. The sole purpose of their existence is
	# to verify the downloads.

	# NOTE THAT THE FOLLOWING SCRIPT STARTS wget AS BACKGROUND PROCESSES.
	#!/bin/bash

	# From http://tech.serbinn.net/2010/shell-script-to-create-ramdisk-on-mac-os-x/
	#

	ARGS=2
	E_BADARGS=99

	if [ $# -ne $ARGS ] # correct number of arguments to the script;
	then

	/**
	* To compile:
	* scalac -optimize ByteBufferPerf.scala
	*
	* JAVA_OPTS="-Xmx2g" scala IntArrayPerf 10
	* 49 62 48 45 48 45 48 50 47 45
	*
	* JAVA_OPTS="-Xmx2g" scala ByteBufferPerf 10
	* 479 491 484 480 484 481 477 477 472 473
	def testWrite(path: String): Long = {
	val startTime = System.currentTimeMillis()
	val out = new java.io.FileWriter(path)
	var i = 1
	val bytes = " " * (1024 * 1024)
	while (i < 1000) {
	out.write(bytes)
	i += 1
	}
	out.close
	package spark.util

	import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

	import scala.collection.mutable

	import org.objectweb.asm.{ClassReader, MethodVisitor}
	import org.objectweb.asm.commons.EmptyVisitor
	import org.objectweb.asm.Opcodes._


	// 1001 381 384 384 383 384 407 404 409 407

	object ArrayBufferBenchmark extends scala.testing.Benchmark {

	def run = {

	val len = 10 * 1000 * 1000
	val a = new scala.collection.mutable.ArrayBuffer[Int](len)
	set -e
	set -o pipefail

	/root/spark/bin/stop-all.sh

	rm -rf ~/.ivy2/local/org.spark*
	rm -rf ~/.ivy2/cache/org.spark*

	cd /root/spark
	git checkout master

	def takeAsync(num: Int): FutureAction[Seq[T]] = {
	val promise = new CancellablePromise[Seq[T]]

	promise.run {
	val buf = new ArrayBuffer[T](num)
	val totalParts = self.partitions.length
	var partsScanned = 0
	while (buf.size < num && partsScanned < totalParts && !promise.cancelled) {
	// The number of partitions to try in this iteration. It is ok for this number to be
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*