Skip to content

Instantly share code, notes, and snippets.

@Ayush-Singhal28
Last active February 22, 2018 05:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ayush-Singhal28/26a31f7abff6c6f58c70c3abcc072adb to your computer and use it in GitHub Desktop.
Save Ayush-Singhal28/26a31f7abff6c6f58c70c3abcc072adb to your computer and use it in GitHub Desktop.
Spark-Assignment-1
// Solution-1
scala> val line = "Hello, world"
line: String = Hello, world
scala> val rdd = sc.parallelize(List(line))
rdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[1] at parallelize at <console>:26
scala> rdd.collect()
res1: Array[String] = Array(Hello, world)
// solution-2
scala> val rdd1 = sc.parallelize(List(1,1,2,3,4,4).distinct)
rdd1: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[28] at parallelize at <console>:24
scala> val rdd2 = sc.parallelize(List(1,2,3,4))
rdd2: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[29] at parallelize at <console>:24
scala> rdd1.zip(rdd2)
res11: org.apache.spark.rdd.RDD[(Int, Int)] = ZippedPartitionsRDD2[30] at zip at <console>:29
scala> res11.collect
res12: Array[(Int, Int)] = Array((1,1), (2,2), (3,3), (4,4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment