Skip to content

Instantly share code, notes, and snippets.

@cloud-fan
Last active January 17, 2016 06:04
Show Gist options
  • Save cloud-fan/fa77713ccebf0823b2ab to your computer and use it in GitHub Desktop.
Save cloud-fan/fa77713ccebf0823b2ab to your computer and use it in GitHub Desktop.
object ArrayHashBenchmark {
def main (args: Array[String]) {
val iters = 1024 * 40
val numArrays = 1024
val schema = ArrayType(IntegerType)
val generator = RandomDataGenerator.forType(schema, nullable = false).get
val encoder = ExpressionEncoder[Seq[Int]]
val arrays = (1 to numArrays).map { _ =>
encoder.toRow(generator().asInstanceOf[Seq[Int]]).getArray(0).asInstanceOf[UnsafeArrayData]
}.toArray
val benchmark = new Benchmark("hashCode for unsafe array", iters * numArrays)
benchmark.addCase("old version") { _ =>
for (_ <- 1 to iters) {
var i = 0
var sum = 0
while (i < arrays.length) {
sum += arrays(i).oldHashCode()
i += 1
}
}
}
benchmark.addCase("new version") { _ =>
for (_ <- 1 to iters) {
var i = 0
var sum = 0
while (i < arrays.length) {
sum += arrays(i).newHashCode()
i += 1
}
}
}
benchmark.run()
}
}
object HashBenchmark {
def test(name: String, schema: StructType, iters: Int): Unit = {
val generator = RandomDataGenerator.forType(schema, nullable = false).get
val encoder = RowEncoder(schema)
val rows = (1 to 10000).map(_ => encoder.toRow(generator().asInstanceOf[Row]).copy()).toArray
val attrs = schema.toAttributes
val p1 = GenerateMutableProjection.generate(attrs, attrs)()
val p2 = UnsafeProjection.create(new Murmur3Hash(attrs) :: Nil, attrs)
val benchmark = new Benchmark("Hash For " + name, iters * 8 * 1024)
benchmark.addCase("normal hash") { _: Int =>
var sum = 0
var i = 0
while (i < 10000) {
sum += p1(rows(i)).hashCode()
i += 1
}
}
benchmark.addCase("hash expr") { _: Int =>
var sum = 0
var i = 0
while (i < 10000) {
sum += p2(rows(i)).getInt(0)
i += 1
}
}
benchmark.run()
}
def main(args: Array[String]): Unit = {
val simple = new StructType().add("i", IntegerType)
test("simple", simple, 1024 * 400)
val normal = new StructType()
.add("null", NullType)
.add("boolean", BooleanType)
.add("byte", ByteType)
.add("short", ShortType)
.add("int", IntegerType)
.add("long", LongType)
.add("float", FloatType)
.add("double", DoubleType)
.add("bigDecimal", DecimalType.SYSTEM_DEFAULT)
.add("smallDecimal", DecimalType.USER_DEFAULT)
.add("string", StringType)
.add("binary", BinaryType)
.add("date", DateType)
.add("timestamp", TimestampType)
test("normal", normal, 1024 * 40)
val structOfInt = new StructType().add("i", IntegerType)
val arrayOfInt = ArrayType(IntegerType)
val array = new StructType()
.add("array", arrayOfInt)
.add("arrayOfArray", ArrayType(arrayOfInt))
.add("arrayOfStruct", ArrayType(structOfInt))
test("array", array, 1024 * 40)
val mapOfInt = MapType(IntegerType, IntegerType)
val map = new StructType()
.add("map", mapOfInt)
.add("mapOfMap", MapType(IntegerType, mapOfInt))
.add("mapOfStruct", MapType(structOfInt, structOfInt))
test("map", map, 1024 * 40)
}
}
object SafeProjectionBenchmark {
def main (args: Array[String]): Unit = {
val iters = 1024 * 40
val numRows = 1024
val schema = new StructType()
.add("byte", ByteType)
.add("short", ShortType)
.add("int", IntegerType)
.add("long", LongType)
.add("float", FloatType)
.add("double", DoubleType)
val generator = RandomDataGenerator.forType(schema, nullable = false).get
val encoder = RowEncoder(schema)
val rows = (1 to numRows).map { _ =>
encoder.toRow(generator().asInstanceOf[Row])
}.toArray
val attrs = schema.toAttributes
val p1 = GenerateSafeProjection.test(attrs, attrs, old = true)
val p2 = GenerateSafeProjection.test(attrs, attrs, old = false)
val benchmark = new Benchmark("safe projection", iters * numRows)
benchmark.addCase("old version") { _ =>
for (_ <- 1 to iters) {
var i = 0
var sum = 0.0
while (i < rows.length) {
sum += p1(rows(i)).getByte(0)
sum += p1(rows(i)).getShort(1)
sum += p1(rows(i)).getInt(2)
sum += p1(rows(i)).getLong(3)
sum += p1(rows(i)).getFloat(4)
sum += p1(rows(i)).getDouble(5)
i += 1
}
}
}
benchmark.addCase("new version") { _ =>
for (_ <- 1 to iters) {
var i = 0
var sum = 0.0
while (i < rows.length) {
sum += p2(rows(i)).getByte(0)
sum += p2(rows(i)).getShort(1)
sum += p2(rows(i)).getInt(2)
sum += p2(rows(i)).getLong(3)
sum += p2(rows(i)).getFloat(4)
sum += p2(rows(i)).getDouble(5)
i += 1
}
}
}
benchmark.run()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment