Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Spark-Assignment-1
// Solution-1
scala> val line = "Hello, world"
line: String = Hello, world
scala> val rdd = sc.parallelize(List(line))
rdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[1] at parallelize at <console>:26
scala> rdd.collect()
res1: Array[String] = Array(Hello, world)
// solution-2
scala> val rdd1 = sc.parallelize(List(1,1,2,3,4,4).distinct)
rdd1: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[28] at parallelize at <console>:24
scala> val rdd2 = sc.parallelize(List(1,2,3,4))
rdd2: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[29] at parallelize at <console>:24
scala> rdd1.zip(rdd2)
res11: org.apache.spark.rdd.RDD[(Int, Int)] = ZippedPartitionsRDD2[30] at zip at <console>:29
scala> res11.collect
res12: Array[(Int, Int)] = Array((1,1), (2,2), (3,3), (4,4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment