Created
January 13, 2021 05:51
-
-
Save dharma6872/889daacc769ba411e2ea0e6dd6c1ea16 to your computer and use it in GitHub Desktop.
[Create First RDD] RDD 생성 #pyspark #pyspark101
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Importing Spark Related Packages | |
from pyspark.sql import SparkSession | |
if __name__ == "__main__": | |
print("PySpark 101 Tutorial") | |
print("Part 2. Create First RDD(Resilient Distributed Dataset) in PySpark using PyCharm IDE") | |
spark = SparkSession \ | |
.builder \ | |
.appName("Part 2. Create First RDD(Resilient Distributed Dataset) in PySpark using PyCharm IDE") \ | |
.master("local[*]") \ | |
.enableHiveSupport() \ | |
.getOrCreate() | |
py_number_list = [1,2,3,4,5] | |
print("Printing Python Number List: ") | |
print(py_number_list) | |
print(type(py_number_list)) | |
print("Creating First RDD from Python Number List") | |
# 숫자 3의 의미는 파티션을 갯수 | |
number_rdd = spark.sparkContext.parallelize(py_number_list, 3) | |
print(type(number_rdd)) | |
py_str_list = ["Arun", "Arvind", "Arjun", "Anna"] | |
print(py_str_list) | |
str_rdd = spark.sparkContext.parallelize(py_str_list, 2) | |
print(type(str_rdd)) | |
str_rdd_output = str_rdd.collect() | |
print("Printing Output str_rdd: ") | |
print(str_rdd_output) | |
print("Stopping the SparkSession object") | |
spark.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment