Instantly share code, notes, and snippets.

@brentp /speed.md
Last active Aug 29, 2015

Embed
What would you like to do?
with-bcolz time (seconds) bcolz-time md5sum num_lines filter
True 2.8 0.62 5281fecb8571cc7c8e832e5682e865bb 16778 (gt_phred_ll_homalt).(*).(==0).(all)
False 155.2 NA 5281fecb8571cc7c8e832e5682e865bb 16778 (gt_phred_ll_homalt).(*).(==0).(all)
True 3.1 0.74 b2b75c9a56e128410a0e19708f096e2f 16750 ((gt_phred_ll_homalt).().(==0).(all)) and (gt_types).().(==HOM_ALT).(all)
False 212.8 NA b2b75c9a56e128410a0e19708f096e2f 16750 ((gt_phred_ll_homalt).().(==0).(all)) and (gt_types).().(==HOM_ALT).(all)
True 1.4 0.42 c4885f35d03bed862d1e9d0ece71bc7c 2367 (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF)
False 203.0 NA c4885f35d03bed862d1e9d0ece71bc7c 2367 (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF)
True 10.0 0.53 01231bd39615a85b2e4a6ffc6a53a7b9 96824 (gt_ref_depths).(phenotype==1).(>=20).(all)
False 157.6 NA 01231bd39615a85b2e4a6ffc6a53a7b9 96824 (gt_ref_depths).(phenotype==1).(>=20).(all)
with-bcolz time (seconds) bcolz-time md5sum lines filter
version 0.11 2115.9 NA c4885f35d03bed862d1e9d0ece71bc7c 2367 (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF)
version 0.11 1984.7 NA 01231bd39615a85b2e4a6ffc6a53a7b9 96824 (gt_ref_depths).(phenotype==1).(>=20).(all)
import toolshed as ts
import time
import sys
import hashlib
db = sys.argv[1]
kid = sys.argv[2] if len(sys.argv) > 2 else "SMS173"
mom = sys.argv[3] if len(sys.argv) > 2 else "SMS238"
dad = sys.argv[4] if len(sys.argv) > 2 else "SMS239"
# use command-line to make sure we are getting the same version.
version = next(ts.nopen('|gemini --version 2>&1')).split()[1]
version = tuple(map(int, version.split(".")))
filters = (
'(gt_phred_ll_homalt).(*).(==0).(all)',
'((gt_phred_ll_homalt).(*).(==0).(all)) and (gt_types).(*).(==HOM_ALT).(all)',
'(gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF)',
'(gt_ref_depths).(phenotype==1).(>=20).(all)'
)
def md5(s):
return hashlib.md5(s).hexdigest()
print "\t".join(["with-bcolz", "time (seconds)", "bcolz-time", "md5sum", "lines", "filter"])
query = "select chrom, start, end from variants"
for filter in filters:
if version < (0, 12, 0) and "phred_ll" in filter:
continue
for bc in (True, False):
if bc and version < (0, 14, 0): continue
cmd = "gemini query -q '{query}' --gt-filter '{filter}' {db}".format(filter=filter.format(**locals()), query=query, db=db)
if bc:
cmd += " --use-bcolz"
t0 = time.time()
p1 = ts.nopen("|%s > aa" % cmd, mode=None)
list(p1.stdout)
se = p1.stderr.read().split("\n")
p1.wait()
if bc:
try:
bctime = [x.split()[1] for x in se if "seconds" in x][0]
except:
print se
else:
bctime = "NA"
t = "%.1f" % (time.time() - t0)
lines = sum(1 for _ in open("aa"))
r = [bc, t, bctime, md5(open("aa").read()), lines, filter]
print "\t".join(map(str, r))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment