Create a gist now

Instantly share code, notes, and snippets.

@tovbinm /udfs.scala
Last active Jun 29, 2017

Embed
What would you like to do?
Codegen dies (Spark 2.0.2 and 2.1.1) - no udf nesting
import spark.implicits._
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.execution.debug._
val u = udf((a: Int) => a)
val df = spark.sparkContext.parallelize(Seq(0)).toDF("0")
val res = (1 until 20).foldLeft(df) { case (d, i) =>
val inputs = d.columns.toSeq.takeRight(1).map(col(_))
d.select(col("*"), u(inputs: _*).as(i.toString))
}
res.debugCodegen()
res.debug()
res.show(false)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment