Last active
January 13, 2021 08:49
-
-
Save dharma6872/12f485fb91fb2991fbfd2267cef0d0ac to your computer and use it in GitHub Desktop.
[flatMap()]flatMap 함수 사용법 예시 #pyspark #pyspark101
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession | |
if __name__ == "__main__": | |
print("Pyspark 101 Tutorial") | |
print("Parkt 5 - How to use flatMap RDD transformation in PySpark | PySpark 101 using Pycharm IDE") | |
# appName() 에 | 연산자를 사용하면 오류가 발생합니다. | |
# appName("Parkt 5 - How to use flatMap RDD transformation in PySpark | PySpark 101") | |
spark = SparkSession \ | |
.builder \ | |
.appName("Parkt 5 - How to use flatMap RDD transformation in PySpark") \ | |
.master("local[*]") \ | |
.enableHiveSupport() \ | |
.getOrCreate() | |
py_number_str_list = ["1,2,3,4,5", "6,7,8,9,10", "11,12,13,14,15"] | |
print("Printing Python Number List: ") | |
print(py_number_str_list) | |
print("Creating First RDD from Python Number List") | |
number_str_rdd = spark.sparkContext.parallelize(py_number_str_list, 3) | |
print("Printing Map Result") | |
number_str_flat_rdd = number_str_rdd.map(lambda n: n.split(",")) | |
print(number_str_flat_rdd.collect()) | |
print("Printing flatMap Result") | |
number_str_flat_rdd = number_str_rdd.flatMap(lambda n: n.split(",")) | |
print(number_str_flat_rdd.collect()) | |
input_file_path = "file:///g:/WS/data/pyspark101/tech_overview.txt" | |
tech_overview_rdd = spark.sparkContext.textFile(input_file_path) | |
print(tech_overview_rdd) | |
print("Printing tech_overview_rdd: ") | |
print(tech_overview_rdd.collect()) | |
tech_overview_words_rdd = tech_overview_rdd.flatMap(lambda ele: ele.split(" ")) | |
tech_overview_words_list = tech_overview_words_rdd.collect() | |
print(tech_overview_words_list) | |
for word in tech_overview_words_list: | |
print(word) | |
print("Stopping the SparkSession object") | |
spark.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment