from random import randint | |
# create a list of random numbers between 10 to 1000 | |
my_large_list = [randint(10,1000) for x in range(0,20000000)] | |
# create one partition of the list | |
my_large_list_one_partition = sc.parallelize(my_large_list,numSlices=1) | |
# check number of partitions | |
print(my_large_list_one_partition.getNumPartitions()) | |
# >> 1 | |
# filter numbers greater than equal to 200 | |
my_large_list_one_partition = my_large_list_one_partition.filter(lambda x : x >= 200) | |
# code was run in a jupyter notebook | |
# to calculate the time taken to execute the following command | |
%%time | |
# count the number of elements in filtered list | |
print(my_large_list_one_partition.count()) | |
# >> 16162207 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment