Skip to content

Instantly share code, notes, and snippets.

View cloud-fan's full-sized avatar

Wenchen Fan cloud-fan

  • Databricks
  • Hangzhou, China
View GitHub Profile
object NewUDFBenchmark extends SqlBasedBenchmark {
import spark.implicits._
private def nativeAdd(card: Long): Unit = {
spark.range(card).select($"id" + $"id").write.format("noop").mode("append").save()
}
private def udfAdd(card: Long): Unit = {
val my_udf = udf { (input1: Long, input2: Long) => input1 + input2 }
spark.range(card).select(my_udf($"id", $"id")).write.format("noop").mode("append").save()
@cloud-fan
cloud-fan / tests.md
Last active September 20, 2016 02:55
insertion in spark sql

normal table: src(a int, b int)

the result is produced by select a, b from src

SQL:

INSERT INTO TABLE src SELECT 1,2;

1.6:

  • hive table: success, the result is 1, 2
object ArrayHashBenchmark {
def main (args: Array[String]) {
val iters = 1024 * 40
val numArrays = 1024
val schema = ArrayType(IntegerType)
val generator = RandomDataGenerator.forType(schema, nullable = false).get
val encoder = ExpressionEncoder[Seq[Int]]
@cloud-fan
cloud-fan / zkw.scala
Last active August 29, 2015 14:12
cost flow
import scala.collection.mutable
class CostFlow(src: String, target: String) {
outer =>
class Edge(
val to: String,
val cost: Int,
var capacity: Int,
val next: Int,
@cloud-fan
cloud-fan / dijstra.scala
Last active August 29, 2015 14:11
scala dijkstra
import scala.collection.mutable
import scala.collection.immutable.HashMap
class Graph(data: Seq[(String, String, Double)]) {
case class Vertex(label: String, connection: Seq[Edge])
class Edge(targetLabel: String, val weight: Double) {
lazy val target = adjacencyList(targetLabel)
}