Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@rigid
Last active August 29, 2015 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rigid/10dd4dbbd546134a4268 to your computer and use it in GitHub Desktop.
Save rigid/10dd4dbbd546134a4268 to your computer and use it in GitHub Desktop.
Random hangs that can't be interrupted by Ctrl+C (not even in pdb)
#!/usr/bin/python
import sys
import csv
import Queue
import threading
max_threads = 30
# input CSV
infile=str(sys.argv[1])
# output CSV
outfile=str(sys.argv[2])
# amount of rows in input CSV
row_count = 0
# amount of rows processed until now
rows = 0
###############################################################################
def worker():
"""thread worker"""
while True:
worker_data = queue.get()
# empty queue?
if worker_data is None:
return
writer = worker_data['writer']
row = worker_data['row']
# process row
# ...
# build result
result_row = ...
# write immediately to not loose anything
writer.writerows(result_row)
# done
queue.task_done()
###############################################################################
# initialize queue
queue = Queue.Queue()
with open(infile, 'r') as f_in, open(outfile, 'w') as f_out:
# CSV writer
writer = csv.writer(f_out, delimiter='|', quotechar='\'')
# CSV reader
csv_fields=['a', 'b', 'c', 'd', 'e']
r = csv.DictReader(f_in, fieldnames=csv_fields, delimiter='|', quotechar='\'')
# get amount of input rows
row_count = sum(1 for row in r)
# seek back to start
f_in.seek(0)
# launch thread pool
for i in range(max_threads):
t = threading.Thread(target=worker)
t.daemon = True
t.start()
# walk all rows from input
for row in r:
# put row + writer in queue
work_data = { 'writer' : writer, 'row' : row }
queue.put(work_data)
# don't bang the queue so we can still handle SIGINT
time.sleep(0.1)
queue.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment