Skip to content

Instantly share code, notes, and snippets.

@BrianLondon
Created February 22, 2016 18:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BrianLondon/c3c3355d1971971f3ec6 to your computer and use it in GitHub Desktop.
Save BrianLondon/c3c3355d1971971f3ec6 to your computer and use it in GitHub Desktop.
import org.apache.spark._
object PartitionTest extends App {
class MyParitioner() extends Partitioner {
override def numPartitions = 3
override def getPartition(x: Any) = x match { case n: Int => n % 3 }
override def toString = s"MyPartitioner"
}
val myPartitioner = new MyParitioner()
val conf = new SparkConf().setMaster("local[*]").setAppName("PartitionTest")
implicit val sc = new SparkContext(conf)
val rdd = sc.parallelize(List(1,2,3,4,5,6,7,8,9,10))
val p1 = rdd.partitioner
println(s"orig: $p1") // None
val p2 = rdd
.groupBy((x: Int) => x % 3, myPartitioner)
.partitioner
println(s"groupBy: $p2") // Some(MyPartitioner)
val p3 = rdd
.groupBy((x: Int) => x % 3, myPartitioner)
.map(t => t._1 -> t._2.toList.length)
.partitioner
println(s"flatMap: $p3") // None
val p4 = rdd
.groupBy((x: Int) => x % 3, myPartitioner)
.map(t => t._1 -> t._2.toList.length)
.partitionBy(myPartitioner)
.partitioner
println(s"reapplied: $p4") // Some(MyPartitioner)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment