Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save dharma6872/12f485fb91fb2991fbfd2267cef0d0ac to your computer and use it in GitHub Desktop.
Save dharma6872/12f485fb91fb2991fbfd2267cef0d0ac to your computer and use it in GitHub Desktop.
[flatMap()]flatMap 함수 사용법 예시 #pyspark #pyspark101
from pyspark.sql import SparkSession
if __name__ == "__main__":
print("Pyspark 101 Tutorial")
print("Parkt 5 - How to use flatMap RDD transformation in PySpark | PySpark 101 using Pycharm IDE")
# appName() 에 | 연산자를 사용하면 오류가 발생합니다.
# appName("Parkt 5 - How to use flatMap RDD transformation in PySpark | PySpark 101")
spark = SparkSession \
.builder \
.appName("Parkt 5 - How to use flatMap RDD transformation in PySpark") \
.master("local[*]") \
.enableHiveSupport() \
.getOrCreate()
py_number_str_list = ["1,2,3,4,5", "6,7,8,9,10", "11,12,13,14,15"]
print("Printing Python Number List: ")
print(py_number_str_list)
print("Creating First RDD from Python Number List")
number_str_rdd = spark.sparkContext.parallelize(py_number_str_list, 3)
print("Printing Map Result")
number_str_flat_rdd = number_str_rdd.map(lambda n: n.split(","))
print(number_str_flat_rdd.collect())
print("Printing flatMap Result")
number_str_flat_rdd = number_str_rdd.flatMap(lambda n: n.split(","))
print(number_str_flat_rdd.collect())
input_file_path = "file:///g:/WS/data/pyspark101/tech_overview.txt"
tech_overview_rdd = spark.sparkContext.textFile(input_file_path)
print(tech_overview_rdd)
print("Printing tech_overview_rdd: ")
print(tech_overview_rdd.collect())
tech_overview_words_rdd = tech_overview_rdd.flatMap(lambda ele: ele.split(" "))
tech_overview_words_list = tech_overview_words_rdd.collect()
print(tech_overview_words_list)
for word in tech_overview_words_list:
print(word)
print("Stopping the SparkSession object")
spark.stop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment