Skip to content

Instantly share code, notes, and snippets.

@NeelamLakra
Created February 22, 2018 05:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NeelamLakra/7b7adb2825cb49e914ae8e78142611b6 to your computer and use it in GitHub Desktop.
Save NeelamLakra/7b7adb2825cb49e914ae8e78142611b6 to your computer and use it in GitHub Desktop.
Spark-Assignment1
scala> val line = "hello, world"
input: String = hello, world
scala> val list= List(line)
list: List[String] = List(hello, world)
scala> val rdd1=sc.parallelize(list)
rdd1: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[1] at parallelize at <console>:28
scala> rdd1.collect
res1: Array[String] = Array(hello, world)
scala> val x = sc.parallelize(List(1,1,2,3,4,4).distinct)
x: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[0] at parallelize at <console>:24
scala> val y = sc.parallelize(List(1,2,3,4))
y: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[1] at parallelize at <console>:24
scala> x.zip(y)
res0: org.apache.spark.rdd.RDD[(Int, Int)] = ZippedPartitionsRDD2[2] at zip at <console>:29
scala> res0.collect
res1: Array[(Int, Int)] = Array((1,1), (2,2), (3,3), (4,4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment