Last active
August 29, 2015 14:20
-
-
Save rigid/10dd4dbbd546134a4268 to your computer and use it in GitHub Desktop.
Random hangs that can't be interrupted by Ctrl+C (not even in pdb)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import sys | |
import csv | |
import Queue | |
import threading | |
max_threads = 30 | |
# input CSV | |
infile=str(sys.argv[1]) | |
# output CSV | |
outfile=str(sys.argv[2]) | |
# amount of rows in input CSV | |
row_count = 0 | |
# amount of rows processed until now | |
rows = 0 | |
############################################################################### | |
def worker(): | |
"""thread worker""" | |
while True: | |
worker_data = queue.get() | |
# empty queue? | |
if worker_data is None: | |
return | |
writer = worker_data['writer'] | |
row = worker_data['row'] | |
# process row | |
# ... | |
# build result | |
result_row = ... | |
# write immediately to not loose anything | |
writer.writerows(result_row) | |
# done | |
queue.task_done() | |
############################################################################### | |
# initialize queue | |
queue = Queue.Queue() | |
with open(infile, 'r') as f_in, open(outfile, 'w') as f_out: | |
# CSV writer | |
writer = csv.writer(f_out, delimiter='|', quotechar='\'') | |
# CSV reader | |
csv_fields=['a', 'b', 'c', 'd', 'e'] | |
r = csv.DictReader(f_in, fieldnames=csv_fields, delimiter='|', quotechar='\'') | |
# get amount of input rows | |
row_count = sum(1 for row in r) | |
# seek back to start | |
f_in.seek(0) | |
# launch thread pool | |
for i in range(max_threads): | |
t = threading.Thread(target=worker) | |
t.daemon = True | |
t.start() | |
# walk all rows from input | |
for row in r: | |
# put row + writer in queue | |
work_data = { 'writer' : writer, 'row' : row } | |
queue.put(work_data) | |
# don't bang the queue so we can still handle SIGINT | |
time.sleep(0.1) | |
queue.join() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment