Last active
November 20, 2022 17:28
-
-
Save robert-lucente/2c324ae6fc9b780b51cb3267f8f93676 to your computer and use it in GitHub Desktop.
Parallelizing Nested For Loops in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Often nested for-loops cause performance issues. Remember that the complexity | |
rises as the power of the number of nested for loops. So, if you | |
have 3 nested for-loops, the complexity is N**3. The ideal case would be to | |
refactor the algorithm and associated code not to use nested for loops, | |
although this is not always possible. | |
A potential alternative might be to just throw hardware at the problem. The | |
use case that we are going to explore will permit this. | |
The use case that we are going to explore is a hierarchy of categories that | |
is three level deep. The first level of the hierarchy has an associated priority | |
and consequently, they all must be processed before the level 2 categories | |
can be processed. However, since level 2 categories have no associated | |
priority, they can all be executed in parallel. Similarly, once a category 2 | |
has been processed, all its associated level 3 categories can be processed | |
in parallel. | |
For a tabular format of the data used for the categories, go to the links below | |
Excel: https://docs.google.com/spreadsheets/d/1w9NJ9y_Pcb9xXITmG8PU66NovBZtajqt/ | |
PDF: https://drive.google.com/file/d/1HcYyObk4LMAczN5FyJu_H6l2rv75E29k/view?usp=sharing | |
For the associated code/data structures for the categories, refer to | |
common_processing.py. | |
The program is initially written using nested for loops. For the details, refer | |
to nested_for_loops.py. | |
The program is then re-written such that it is parallelized. Python's | |
multiprocessing pool is used. For the details, refer to | |
parallelize_nested_for_loops.py. | |
To demonstrate that the fundamental code for nested for loops vs parallelized | |
nested for loops is not that different, the common code was placed in | |
common_processing.py. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Common processing for "nested for loops" and "parallelized nested | |
for loops." | |
""" | |
def process_categories_lvl_1(): | |
""" | |
Since level 1 categories are prioritized, | |
they require special processing. Furthermore, | |
all level 1 category processing muust be | |
completed before any level 2 categories can | |
be processed. Consequently, they have a separate | |
function. | |
""" | |
print("Process level 1 categories", flush=True) | |
def process_categories_non_lvl_1( | |
category_lvl, category_previos_lvl, category_current_lvl | |
): | |
""" | |
All non-level 1 categories can be processed | |
using the same function. | |
The inputs are needed because in the actual application they | |
are used to dynamically build a query. | |
""" | |
if category_lvl == 2: | |
leading_blank_spaces = " " | |
else: | |
leading_blank_spaces = " " | |
print(flush=True) | |
print( | |
f" {leading_blank_spaces}**** Category level being processed: {category_lvl}", | |
flush=True, | |
) | |
print( | |
f" {leading_blank_spaces}**** Category from previous level: {category_previos_lvl}", | |
flush=True, | |
) | |
print( | |
f" {leading_blank_spaces}**** Category from current level: {category_current_lvl}", | |
flush=True, | |
) | |
print(flush=True) | |
def summary_counts(): | |
""" | |
Summary counts can be computed only after all | |
the categories and their associated hierarchies have been processed. | |
""" | |
print("Compute summary counts", flush=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Hierarchical categories of 3-level deep to be processed. | |
For a tabular format of the data used for the categories, | |
go to the links below | |
Excel: https://docs.google.com/spreadsheets/d/1w9NJ9y_Pcb9xXITmG8PU66NovBZtajqt/ | |
PDF: https://drive.google.com/file/d/1HcYyObk4LMAczN5FyJu_H6l2rv75E29k/view?usp=sharing | |
Stand in for the category hiearchy data that is most likely stored in a database. | |
""" | |
from typing import Dict, List, Set | |
def load_categories_lvl_1() -> List[str]: | |
""" | |
Since level 1 categories are prioritized, | |
a list data structure is used. | |
""" | |
categories_lvl_1 = [ | |
"category_lvl_1_a", | |
"category_lvl_1_b", | |
"category_lvl_1_c", | |
"category_lvl_1_d", | |
"category_lvl_1_e", | |
"category_lvl_1_f", | |
"category_lvl_1_g", | |
] | |
return categories_lvl_1 | |
def load_categories_lvl_1_2() -> Dict[str, Set[str]]: | |
""" " | |
Since level 2 categories are not prioritized, | |
the data structure is a dictionary keyed by | |
the level 1 category followed by a set of | |
level 2 categories. | |
Each level 2 category can be associated with only | |
one level 1 category. | |
""" | |
categories_lvl_1_2 = { | |
"category_lvl_1_b": {"category_lvl_2_a"}, | |
"category_lvl_1_c": {"category_lvl_2_b"}, | |
"category_lvl_1_d": {"category_lvl_2_c", "category_lvl_2_d"}, | |
"category_lvl_1_e": { | |
"category_lvl_2_e", | |
"category_lvl_2_f", | |
"category_lvl_2_g", | |
}, | |
"category_lvl_1_f": {"category_lvl_2_h"}, | |
"category_lvl_1_g": {"category_lvl_2_i"}, | |
} | |
return categories_lvl_1_2 | |
def load_categories_lvl_2_3() -> Dict[str, Set[str]]: | |
""" | |
Since level 3 categories are not prioritized, | |
the data structure is a dictionary keyed by | |
the level 2 category followed by a set of | |
level 3 categories. | |
""" | |
categories_lvl_2_3 = { | |
"category_lvl_2_b": {"category_lvl_3_a"}, | |
"category_lvl_2_h": {"category_lvl_3_b", "category_lvl_3_c"}, | |
"category_lvl_2_i": { | |
"category_lvl_3_d", | |
"category_lvl_3_e", | |
"category_lvl_3_f", | |
}, | |
} | |
return categories_lvl_2_3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Hierarchical categories of 3-level deep are processed. | |
Since the depth of the hierarchy is shallow and fixed, no recursion | |
is necessary. Consequently, will use simple nested for loops. | |
""" | |
import common_processing | |
import input_data | |
def main(): | |
""" | |
1) Load category hierarchy | |
2) Process level 1 category hierarchy | |
3) Loop through the category hierarchy | |
4) Compute summary counts | |
""" | |
# 1) Load category hierarchy | |
categories_lvl_1 = input_data.load_categories_lvl_1() | |
categories_lvl_1_2 = input_data.load_categories_lvl_1_2() | |
categories_lvl_2_3 = input_data.load_categories_lvl_2_3() | |
# 2) Process level 1 category hierarchy | |
common_processing.process_categories_lvl_1() | |
# 3) Loop through the category hierarchy | |
for category_lvl_1 in categories_lvl_1: | |
print() | |
print(f"Category Level 1: {category_lvl_1}") | |
try: | |
categories_lvl_2 = categories_lvl_1_2[category_lvl_1] | |
for category_lvl_2 in categories_lvl_2: | |
print() | |
print(f" Category Level 2: {category_lvl_2}") | |
common_processing.process_categories_non_lvl_1( | |
2, category_lvl_1, category_lvl_2 | |
) | |
try: | |
categories_lvl_3 = categories_lvl_2_3[category_lvl_2] | |
for category_lvl_3 in categories_lvl_3: | |
print(f" Category Level 3: {category_lvl_3}") | |
common_processing.process_categories_non_lvl_1( | |
3, category_lvl_2, category_lvl_3 | |
) | |
except KeyError: | |
print(" There are no associated level 3 categories") | |
print() | |
except KeyError: | |
print() | |
print(" There are no associated level 2 categories") | |
# 4) Compute summary counts | |
common_processing.summary_counts() | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Process level 1 categories | |
Category Level 1: category_lvl_1_a | |
There are no associated level 2 categories | |
Category Level 1: category_lvl_1_b | |
Category Level 2: category_lvl_2_a | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_b | |
**** Category from current level: category_lvl_2_a | |
There are no associated level 3 categories | |
Category Level 1: category_lvl_1_c | |
Category Level 2: category_lvl_2_b | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_c | |
**** Category from current level: category_lvl_2_b | |
Category Level 3: category_lvl_3_a | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_b | |
**** Category from current level: category_lvl_3_a | |
Category Level 1: category_lvl_1_d | |
Category Level 2: category_lvl_2_d | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_d | |
**** Category from current level: category_lvl_2_d | |
There are no associated level 3 categories | |
Category Level 2: category_lvl_2_c | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_d | |
**** Category from current level: category_lvl_2_c | |
There are no associated level 3 categories | |
Category Level 1: category_lvl_1_e | |
Category Level 2: category_lvl_2_g | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_e | |
**** Category from current level: category_lvl_2_g | |
There are no associated level 3 categories | |
Category Level 2: category_lvl_2_e | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_e | |
**** Category from current level: category_lvl_2_e | |
There are no associated level 3 categories | |
Category Level 2: category_lvl_2_f | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_e | |
**** Category from current level: category_lvl_2_f | |
There are no associated level 3 categories | |
Category Level 1: category_lvl_1_f | |
Category Level 2: category_lvl_2_h | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_f | |
**** Category from current level: category_lvl_2_h | |
Category Level 3: category_lvl_3_b | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_h | |
**** Category from current level: category_lvl_3_b | |
Category Level 3: category_lvl_3_c | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_h | |
**** Category from current level: category_lvl_3_c | |
Category Level 1: category_lvl_1_g | |
Category Level 2: category_lvl_2_i | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_g | |
**** Category from current level: category_lvl_2_i | |
Category Level 3: category_lvl_3_d | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_i | |
**** Category from current level: category_lvl_3_d | |
Category Level 3: category_lvl_3_e | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_i | |
**** Category from current level: category_lvl_3_e | |
Category Level 3: category_lvl_3_f | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_i | |
**** Category from current level: category_lvl_3_f | |
Compute summary counts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Parallelize level 2 category processing | |
""" | |
from multiprocessing.pool import Pool | |
import input_data | |
import common_processing | |
def get_level_2_categories(): | |
""" | |
The way that the set of level 2 categories is obtained | |
wil vary between applications. In my particular application, | |
the data will be obtained from a database. | |
""" | |
# Load category level 1, 2 combinations | |
categories_lvl_1_2 = input_data.load_categories_lvl_1_2() | |
# Create an empty set to store level 2 categories | |
categories_lvl_2 = set() | |
# Loop through to get the sets containing level 2 categories | |
for category_lvl_2_set in categories_lvl_1_2.values(): | |
# Loop a level 2 category set to obtain | |
# an individual level 2 category | |
for category_lvl_2 in category_lvl_2_set: | |
categories_lvl_2.add(category_lvl_2) | |
return categories_lvl_2 | |
def get_category_lvl_1_given_lvl_2(category_lvl_2): | |
""" | |
In a production system, this function will probably | |
obtain the data from a database. If the execution | |
of this function is time critical, some sort of hash | |
lookup will have to be used. | |
Pylint will complain "Either all return statements in a | |
function should return an expression, or none of them should. | |
(inconsistent-return-statements)". This isn't true because | |
the for loop will always find a key since a level 2 category | |
can't exist without a leve 1 category. | |
""" | |
categories_lvl_1_2 = input_data.load_categories_lvl_1_2() | |
for key, value_set in categories_lvl_1_2.items(): | |
if category_lvl_2 in value_set: | |
return key | |
def process_category_lvl_2(category_lvl_2): | |
""" | |
1) Obtain the category level 1 associated with the | |
input level 2 category | |
2) Process the level 1, 2 category combination | |
3) Process the level 2, 3 combinations for the | |
input level 2 category | |
""" | |
# 1) Obtain the category level 1 associated with the | |
# input level 2 category | |
category_lvl_1 = get_category_lvl_1_given_lvl_2(category_lvl_2) | |
# 2) Process the level 1, 2 combination | |
common_processing.process_categories_non_lvl_1(2, category_lvl_1, category_lvl_2) | |
# 3) Process the level 2, 3 combinations for the | |
# input level 2 category | |
categories_lvl_2_3 = input_data.load_categories_lvl_2_3() | |
try: | |
categories_lvl_3 = categories_lvl_2_3[category_lvl_2] | |
for category_lvl_3 in categories_lvl_3: | |
print(f" Category Level 3: {category_lvl_3}") | |
common_processing.process_categories_non_lvl_1( | |
3, category_lvl_2, category_lvl_3 | |
) | |
except KeyError: | |
print(" There are no associated level 3 categories") | |
print() | |
def main(): | |
""" | |
1) Process level 1 category hierarchy | |
2) Execute in parallel all level 2 categories | |
Each level 2 category in turn will process its | |
associated level 3 categories | |
3) Compute summary counts | |
""" | |
# 1) Process level 1 category hierarchy | |
common_processing.process_categories_lvl_1() | |
print(flush=True) | |
# 2) Execute in parallel all level 2 categories | |
with Pool() as pool: | |
pool.imap_unordered(process_category_lvl_2, get_level_2_categories()) | |
pool.close() | |
pool.join() | |
# 3) Compute summary counts | |
common_processing.summary_counts() | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Process level 1 categories | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_e | |
**** Category from current level: category_lvl_2_f | |
There are no associated level 3 categories | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_f | |
**** Category from current level: category_lvl_2_h | |
Category Level 3: category_lvl_3_b | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_h | |
**** Category from current level: category_lvl_3_b | |
Category Level 3: category_lvl_3_c | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_h | |
**** Category from current level: category_lvl_3_c | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_e | |
**** Category from current level: category_lvl_2_g | |
There are no associated level 3 categories | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_c | |
**** Category from current level: category_lvl_2_b | |
Category Level 3: category_lvl_3_a | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_b | |
**** Category from current level: category_lvl_3_a | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_d | |
**** Category from current level: category_lvl_2_c | |
There are no associated level 3 categories | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_b | |
**** Category from current level: category_lvl_2_a | |
There are no associated level 3 categories | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_d | |
**** Category from current level: category_lvl_2_d | |
There are no associated level 3 categories | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_g | |
**** Category from current level: category_lvl_2_i | |
Category Level 3: category_lvl_3_d | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_i | |
**** Category from current level: category_lvl_3_d | |
Category Level 3: category_lvl_3_f | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_i | |
**** Category from current level: category_lvl_3_f | |
Category Level 3: category_lvl_3_e | |
**** Category level being processed: 3 | |
**** Category from previous level: category_lvl_2_i | |
**** Category from current level: category_lvl_3_e | |
**** Category level being processed: 2 | |
**** Category from previous level: category_lvl_1_e | |
**** Category from current level: category_lvl_2_e | |
There are no associated level 3 categories | |
Compute summary counts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Initial exploration at parallelizing nested for-loops. It was | |
abonded when it was realized that the approach used in the code | |
will not work. This file produces incorrect results and should be | |
deleted. It will be deleted on 2/1/2023. Keeping it around unti | |
then because some people are still referring to it. | |
Hierarchical categories 3 levels deep need to be processed. | |
Since the depth of the hierarchy is shallow and fixed, no recursion | |
is necessary. Consequently, will use simple nested for loops. | |
The first level of the hierarchy has an associated priority. However, | |
the other levels do not. Consequently, once the level 1 categories have | |
been processed, all the level 2 categories can be processed in parallel. | |
Similarly, once a particular level 2 category has been processed, | |
all its associated category 3 levels can be processed in parallel. | |
TODO (Robert): Add processing that once a particular level 2 category has been | |
processed, all its associated category 3 levels can be processed in parallel. | |
""" | |
from multiprocessing.pool import Pool | |
import common_processing | |
import input_data | |
def process_categories_non_lvl_1(categories_lvl_x_y): | |
""" | |
The processing for "nested for loops" and "parallelized nested | |
for loops" is identical except that in the parallel processing | |
the iterable needs to be unpacked. | |
""" | |
common_processing.process_categories_non_lvl_1( | |
categories_lvl_x_y[0], categories_lvl_x_y[1], categories_lvl_x_y[2] | |
) | |
def main(): | |
""" | |
1) Load category hierarchy | |
2) Process level 1 category hierarchy | |
3) Create an empty set to hold a tuple containing | |
the current level, the category from the previous | |
level and the category from the current level | |
4) Loop through the category hierarchy | |
5) Process categories in level 2 in parallel | |
6) Compute summary counts | |
""" | |
# 1) Load category hierarchy | |
categories_lvl_1 = input_data.load_categories_lvl_1() | |
categories_lvl_1_2 = input_data.load_categories_lvl_1_2() | |
# 2) Process level 1 category hierarchy | |
common_processing.process_categories_lvl_1() | |
# 3) Create an empty set to hold a tuple containing | |
# the current level, the category from the previous | |
# level and the category from the current level | |
cat_lvl_1_2_combos = set() | |
# 4) Loop through the category hierarchy | |
for category_lvl_1 in categories_lvl_1: | |
try: | |
categories_lvl_2 = categories_lvl_1_2[category_lvl_1] | |
for category_lvl_2 in categories_lvl_2: | |
cat_lvl_1_2_combos.add((2, category_lvl_1, category_lvl_2)) | |
except KeyError: | |
# Some level 1 categories do not have an associated | |
# level 2 category. This will result in a KeyError | |
# during the dictionary lookup. In this case the | |
# KeyError can be ignored. | |
pass | |
# 5) Process categories in level 2 in parallel | |
with Pool() as pool: | |
pool.imap_unordered(process_categories_non_lvl_1, cat_lvl_1_2_combos) | |
pool.close() | |
pool.join() | |
# 6) Compute summary counts | |
common_processing.summary_counts() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment