Last active
October 3, 2020 08:11
-
-
Save zed/f199d5a0c453be2e9681 to your computer and use it in GitHub Desktop.
Fastest way to sum integers in a file. Python `ctypes` multithreaded version.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** To make shared library, run: | |
$ gcc -shared -O3 -o libcsum_block.so -fPIC csum_block.c | |
Algorithm from: | |
http://stackoverflow.com/questions/25606833/fastest-way-to-sum-integers-in-text-file/25607155#comment40046670_25607155 | |
*/ | |
#include <stdint.h> | |
/** Return sum(map(int, block.split())) */ | |
uint64_t | |
csum_block(const char* block, uint32_t length) { | |
uint64_t total = 0; | |
uint32_t partial = 0; | |
const char* p = block; | |
for (; p != &block[length]; ++p) { | |
if ('0' <= *p && *p <= '9') { | |
partial = 10*partial + (*p - '0'); | |
} | |
else if (partial) { /* non-digit found; end number */ | |
total += partial; /* add the parsed number */ | |
partial = 0; /* start new number */ | |
} | |
} | |
return total + partial; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Fastest way to sum integers in a file. | |
Python `ctypes` multithreaded version. | |
1e9 newline-separated (nonnegative) integers in range [0, 1e9) each, see | |
http://stackoverflow.com/questions/25606833/fastest-way-to-sum-integers-in-text-file/ | |
To measure time performance, run: | |
$ /usr/bin/time python3 sum1Gints-ctypes.py --parallel /tmp/1G-int-per-line | |
See csum_block.c on how to create libcsum_block.so | |
Algorithm: | |
- read ~10M newline-aligned blocks in the main thread in Python (sequential read) | |
- distribute blocks to the thread pool | |
- for each block: call C function, to sum integers | |
- sum intermediate results and print the total result in the main thread | |
""" | |
from ctypes import * | |
from argparse import ArgumentParser | |
from multiprocessing.pool import ThreadPool | |
# declare C function to be called from the shared library | |
csum_block = cdll.LoadLibrary('./libcsum_block.so').csum_block | |
csum_block.argtypes = [c_void_p, c_uint32] | |
csum_block.restype = c_uint64 | |
def sum_block(block): | |
return csum_block(block, len(block)) | |
def aligned_blocks(filename, bufsize): | |
"""Yield newline-aligned blocks ~bufsize size.""" | |
with open(filename, 'rb', bufsize) as file: | |
while True: | |
chunk = file.read(bufsize) | |
if not chunk: | |
break | |
yield chunk + file.readline() | |
# parse command-line | |
parser = ArgumentParser() | |
parser.add_argument('--parallel', action='store_true') | |
parser.add_argument('filename') | |
args = parser.parse_args() | |
# find sum of all integers in a file specified at the command-line | |
if args.parallel: | |
map = ThreadPool().imap_unordered # concurrent | |
print(sum(map(sum_block, aligned_blocks(args.filename, bufsize=1<<23)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment