Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@sinelaw
Last active July 2, 2019 18:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sinelaw/8e53d4d48479cfd8b71b09fbba3b1f57 to your computer and use it in GitHub Desktop.
Save sinelaw/8e53d4d48479cfd8b71b09fbba3b1f57 to your computer and use it in GitHub Desktop.
Parallel copy of a single file
import threading
import os
import sys
BLOCK_SIZE = 1024*1024
THREADS = 16
def write_range(source, target, start, amount, block_size):
with open(source, 'r') as source_file:
with open(target, 'r+') as target_file:
source_file.seek(start)
target_file.seek(start)
for _block_num in xrange(amount / block_size):
target_file.write(source_file.read(block_size))
target_file.write(source_file.read(amount % block_size))
def main(source, target, threads_count=THREADS, block_size=BLOCK_SIZE):
threads = []
file_size = os.stat(source).st_size
print '{} => {}, using {} threads, block size {}'.format(source, target, threads_count, block_size)
with open(target, 'a') as target_file:
pass
for i in xrange(threads_count):
args = (source, target, i * file_size/threads_count, file_size/threads_count, block_size)
#print 'thread:', i, args
t = threading.Thread(target=write_range, args=args)
threads.append(t)
t.start()
for t in threads:
t.join()
if __name__ == '__main__':
main(*sys.argv[1:], threads_count=THREADS, block_size=BLOCK_SIZE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment