# create a RDD of the text file with Number of Partitions = 4 | |
my_text_file = sc.textFile('tokens_spark.txt',minPartitions=4) | |
# RDD Object | |
print(my_text_file) | |
# convert to lower case | |
my_text_file = my_text_file.map(lambda x : x.lower()) | |
# Updated RDD Object | |
print(my_text_file) | |
# Get the RDD Lineage | |
print(my_text_file.toDebugString()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment