with-bcolz | time (seconds) | bcolz-time | md5sum | num_lines | filter |
---|---|---|---|---|---|
True | 2.8 | 0.62 | 5281fecb8571cc7c8e832e5682e865bb | 16778 | (gt_phred_ll_homalt).(*).(==0).(all) |
False | 155.2 | NA | 5281fecb8571cc7c8e832e5682e865bb | 16778 | (gt_phred_ll_homalt).(*).(==0).(all) |
True | 3.1 | 0.74 | b2b75c9a56e128410a0e19708f096e2f | 16750 | ((gt_phred_ll_homalt).().(==0).(all)) and (gt_types).().(==HOM_ALT).(all) |
False | 212.8 | NA | b2b75c9a56e128410a0e19708f096e2f | 16750 | ((gt_phred_ll_homalt).().(==0).(all)) and (gt_types).().(==HOM_ALT).(all) |
True | 1.4 | 0.42 | c4885f35d03bed862d1e9d0ece71bc7c | 2367 | (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF) |
False | 203.0 | NA | c4885f35d03bed862d1e9d0ece71bc7c | 2367 | (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF) |
True | 10.0 | 0.53 | 01231bd39615a85b2e4a6ffc6a53a7b9 | 96824 | (gt_ref_depths).(phenotype==1).(>=20).(all) |
False | 157.6 | NA | 01231bd39615a85b2e4a6ffc6a53a7b9 | 96824 | (gt_ref_depths).(phenotype==1).(>=20).(all) |
with-bcolz | time (seconds) | bcolz-time | md5sum | lines | filter |
version 0.11 | 2115.9 | NA | c4885f35d03bed862d1e9d0ece71bc7c | 2367 | (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF) |
version 0.11 | 1984.7 | NA | 01231bd39615a85b2e4a6ffc6a53a7b9 | 96824 | (gt_ref_depths).(phenotype==1).(>=20).(all) |
import toolshed as ts | |
import time | |
import sys | |
import hashlib | |
db = sys.argv[1] | |
kid = sys.argv[2] if len(sys.argv) > 2 else "SMS173" | |
mom = sys.argv[3] if len(sys.argv) > 2 else "SMS238" | |
dad = sys.argv[4] if len(sys.argv) > 2 else "SMS239" | |
# use command-line to make sure we are getting the same version. | |
version = next(ts.nopen('|gemini --version 2>&1')).split()[1] | |
version = tuple(map(int, version.split("."))) | |
filters = ( | |
'(gt_phred_ll_homalt).(*).(==0).(all)', | |
'((gt_phred_ll_homalt).(*).(==0).(all)) and (gt_types).(*).(==HOM_ALT).(all)', | |
'(gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF)', | |
'(gt_ref_depths).(phenotype==1).(>=20).(all)' | |
) | |
def md5(s): | |
return hashlib.md5(s).hexdigest() | |
print "\t".join(["with-bcolz", "time (seconds)", "bcolz-time", "md5sum", "lines", "filter"]) | |
query = "select chrom, start, end from variants" | |
for filter in filters: | |
if version < (0, 12, 0) and "phred_ll" in filter: | |
continue | |
for bc in (True, False): | |
if bc and version < (0, 14, 0): continue | |
cmd = "gemini query -q '{query}' --gt-filter '{filter}' {db}".format(filter=filter.format(**locals()), query=query, db=db) | |
if bc: | |
cmd += " --use-bcolz" | |
t0 = time.time() | |
p1 = ts.nopen("|%s > aa" % cmd, mode=None) | |
list(p1.stdout) | |
se = p1.stderr.read().split("\n") | |
p1.wait() | |
if bc: | |
try: | |
bctime = [x.split()[1] for x in se if "seconds" in x][0] | |
except: | |
print se | |
else: | |
bctime = "NA" | |
t = "%.1f" % (time.time() - t0) | |
lines = sum(1 for _ in open("aa")) | |
r = [bc, t, bctime, md5(open("aa").read()), lines, filter] | |
print "\t".join(map(str, r)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment