Skip to content

Instantly share code, notes, and snippets.

@jni
Last active September 28, 2015 08:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jni/f165ae2aea45bf900c95 to your computer and use it in GitHub Desktop.
Save jni/f165ae2aea45bf900c95 to your computer and use it in GitHub Desktop.
Throughput of simple streaming of text data with Toolz/CyToolz
from IPython import get_ipython
import toolz as tz
from toolz import curried as c
fn = 'data/mb1_dm6.fa'
t = get_ipython().magic('timeit -o -q tz.pipe(fn, open, tz.last)')
print('Raw throughput (lines): %.2fMB/s' % (1 / t.best))
t = get_ipython().magic('timeit -o -q tz.pipe(fn, open, tz.concat, tz.last)')
print('Single character throughput: %.2fMB/s' % (1 / t.best))
def is_sequence(line):
return len(line) > 1 and not line.startswith('>')
nucleotides = set('ACGTacgt')
def is_nucleotide(char):
return char in nucleotides
t = get_ipython().magic('timeit -o -q tz.pipe(fn, open, c.filter(is_sequence), tz.concat, c.filter(is_nucleotide), tz.last)')
print('Filtered throughput: %.2fMB/s' % (1 / t.best))
# Cython; spoiler alert: doesn't help
import cytoolz as ctz
from cytoolz import curried as cc
t = get_ipython().magic('timeit -o -q ctz.pipe(fn, open, ctz.last)')
print('Cython raw throughput (lines): %.2fMB/s' % (1 / t.best))
t = get_ipython().magic('timeit -o -q ctz.pipe(fn, open, ctz.concat, ctz.last)')
print('Cython single character throughput: %.2fMB/s' % (1 / t.best))
t = get_ipython().magic('timeit -o -q ctz.pipe(fn, open, cc.filter(is_sequence), ctz.concat, cc.filter(is_nucleotide), ctz.last)')
print('Cython filtered throughput: %.2fMB/s' % (1 / t.best))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment