Skip to content

Instantly share code, notes, and snippets.

@wil3
Created December 5, 2017 14:02
Show Gist options
  • Save wil3/75483d2594df2f85439ae6920ef18a78 to your computer and use it in GitHub Desktop.
Save wil3/75483d2594df2f85439ae6920ef18a78 to your computer and use it in GitHub Desktop.
"""
Purpose:
This python script is meant to introduce you to parellel processing with
threads and processes. The example uses a simple worker than randomly selects
a letter and continually hashes this value. This demonstrates two methods for
creating a thread, (1) by specifying a target function or (2) inheriting
threading.Thread.
This lesson also shows that the Thread and Process API are (for the most part)
interchangable allowing us to simply replace threading.Thread with multiprocessing.Process
to switch from threads to processes.
"""
__author__ = "William Koch"
__email__ = "wfkoch@bu.edu"
# STEP 1) Import libraries we will be using
# This library is for multi-threading
import threading
# While this is for multi processing, the API is almost identical
import multiprocessing
#Lets log stuff
import logging
# The remaining libraries will be used to create a simple worker
# This library is for creating hashes, https://docs.python.org/2/library/hashlib.html
import hashlib
# Get times
import time
# Random sampling
import random
# Helper for accessing letters
import string
# STEP 2) Init our logger
# More info: https://docs.python.org/2/howto/logging.html#logging-basic-tutorial
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("disscussion-wk8")
# There are two ways to init worker (1) by function (2) inherit threading.Thread
"""
Method 2 for creating thread inheriting threading.Thread,
to use multiple processors instead just replace threading.Thread with
multiprocessing.Process
"""
class HashPerformance(threading.Thread):
def __init__(self, hashfn, count):
# First call the parents constructor
self.hashfn = hashfn
self.count = count
self.message = random.choice(string.ascii_uppercase)
super(HashPerformance, self).__init__()
def run(self):
# Refer to threading.Thread API, teh start method will call this
# method internally in the Thread class and is what we override. We
# must match the method signature which does not allow any parameters
# therefore we must modify our code to pass them through the constructor.
start_time = time.time()
for i in range(self.count):
self.message = self.message.encode("utf-8")
self.message = hashfn(self.message).hexdigest()
lapse_time = time.time() - start_time
# Compute throughput
tput = self.count / lapse_time
logger.info(" {} Tput = {} hashes/second".format(threading.current_thread().name, tput))
# STEP 3) Creat a worker
"""
An example of a worker function, see below for thread initialization
"""
def compute_hash_tput(count, hashfn):
""" Do some work, repeatidy hash a message and compute tput"""
# Just choose a random letter to hash so output isnt deterministic
message = random.choice(string.ascii_uppercase)
start_time = time.time()
for i in range(count):
# We has some issues on certain computers with encoding,
# make sure in utf-8 required by hashlib if the system is unicode by default
message = message.encode("utf-8")
message = hashfn(message).hexdigest()
lapse_time = time.time() - start_time
# Compute throughput
tput = count / lapse_time
logger.info(" {} Tput = {} hashes/second".format(threading.current_thread().name, tput))
# STEP 4) Create main and initalize variables
if __name__ == "__main__":
# Define the hash we will use, refer to hashlib API doc for options
hashfn = hashlib.sha256
# number of hashes
num_hashes = 100000
num_workers = 10
logger.info("Main thread name = {}".format(threading.current_thread().name))
# STEP 5) Deomonstrate function is working
#compute_hash_tput(100, hashlib.sha256)
# Demonstrate multiple threads doing work
# Create a bunch of threads
workers = []
for i in range(num_workers):
# Method 2 for defining a thread, specify the worker as the target
# Note args is based in a tuple!
t = threading.Thread(target=compute_hash_tput, args=(num_hashes, hashlib.sha256,) )
# start the execution of the thread
t.start()
# save the thread so we can join later
workers.append(t)
# Wait for each worker to finish and join back with the main thread
for w in workers:
w.join()
# Uncomment to demonstrate Method 1
# Now show by craeting classes
"""
class_workers = []
for i in range(num_workers):
t = HashPerformance(hashfn, num_hashes)
t.start()
class_workers.append(t)
for w in class_workers:
w.join()
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment