Skip to content

Instantly share code, notes, and snippets.

@rxin
Created August 20, 2015 05:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rxin/57c56ba913c9dc011fb5 to your computer and use it in GitHub Desktop.
Save rxin/57c56ba913c9dc011fb5 to your computer and use it in GitHub Desktop.
code gen test
package org.apache.spark.sql
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.functions._
object CodegenTest {
def main(args: Array[String]): Unit = {
val sc = SparkContext.getOrCreate()
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
val n = args(0).toInt
for (i <- 1 to n) {
val start = System.currentTimeMillis()
val df = sc.parallelize(1 to 1000000).map(i => (i, i.toString)).toDF("a", "b")
// use "+ i" to avoid reusing the same generated code.
df.select(Seq.fill(100)(sum(df("a") + i)) : _*).collect()
val timeTaken = System.currentTimeMillis() - start
println(s"iter $i time taken: $timeTaken")
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment