Last active
January 13, 2021 08:50
-
-
Save dharma6872/0c393ef53529da8a0b07dc772c80170a to your computer and use it in GitHub Desktop.
[filter RDD transformation] filter RDD 변환 예시 #pyspark #pyspark101
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Importing Spark Related Packages | |
from pyspark.sql import SparkSession | |
if __name__ == "__main__": | |
print("PySpark 101 Tutorial") | |
print("Part 4. How to use filter RDD transformation in PySpark using PyCharm IDE") | |
spark = SparkSession \ | |
.builder \ | |
.appName("Part 4. How to use filter RDD transformation in PySpark using PyCharm IDE") \ | |
.master("local[*]") \ | |
.enableHiveSupport() \ | |
.getOrCreate() | |
py_number_list = [1,2,3,4,5] | |
print("Printing Python Number List: ") | |
print(py_number_list) | |
print("Creating First RDD from Python Number List") | |
# 숫자 3의 의미는 파티션을 갯수 | |
number_rdd = spark.sparkContext.parallelize(py_number_list) | |
number_even_rdd = number_rdd.filter(lambda n: n % 2 == 0) | |
print(number_even_rdd.collect()) | |
py_str_list = ["Arun", "Arvind", "Arjun", "Anna"] | |
print(py_str_list) | |
str_rdd = spark.sparkContext.parallelize(py_str_list, 2) | |
str_rdd_result = str_rdd.filter(lambda name: "r" in name).collect() | |
print(str_rdd_result) | |
input_file_path = "file:///g:/WS/data/pyspark101/tech.txt" | |
tech_rdd = spark.sparkContext.textFile(input_file_path) | |
tech_lower_rdd = tech_rdd.filter(lambda ele: "park" in ele) | |
tech_lower_rdd_list = tech_lower_rdd.collect() | |
for element in tech_lower_rdd_list: | |
print(element) | |
print("Stopping the SparkSession object") | |
spark.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment