Skip to content

Instantly share code, notes, and snippets.

@betatim

betatim/fp.py Secret

Created August 25, 2017 14:48
Show Gist options
  • Save betatim/4365c8cffc57f6b911a7c8c8204e7ebe to your computer and use it in GitHub Desktop.
Save betatim/4365c8cffc57f6b911a7c8c8204e7ebe to your computer and use it in GitHub Desktop.
import os
import time
import random
import resource
import khmer
random.seed(1234)
k = 23
n_slots = 27
# create kmers to test speed with
kmers = []
if not os.path.exists("kmers.txt"):
choice_ = random.choice
for n in range(10000000):
kmer = []
for _ in range(k):
kmer.append(choice_(['A', 'C', 'G', 'T']))
kmers.append(''.join(kmer))
kmers = list(set(kmers))
with open("kmers.txt", "w") as f:
for kmer in kmers:
f.write(kmer + "\n")
else:
with open("kmers.txt") as f:
for line in f:
kmers.append(line.strip())
def speed(kind='bf', size=9000000, query_size=1000000):
if kind == 'bf':
ct = khmer.Counttable(k, 2**n_slots, 2)
else:
ct = khmer.QFCounttable(k, 2**n_slots)
tic = time.time()
# add kmers
for i in range(size):
ct.add(kmers[i])
toc = time.time()
# query kmers not present
for i in range(query_size):
ct.get(kmers[-i-1])
tuc = time.time()
kmers_ = kmers[:query_size]
random.shuffle(kmers_)
tuc_ = time.time()
# query kmers present
for i in range(query_size):
ct.get(kmers_[i])
tac = time.time()
print("kind, load, query, t_load, t_queryNP, t_queryP")
print("{}, {}, {}, {}, {}, {}".format(kind, size, query_size,
toc-tic, tuc-toc, tac-tuc_))
if __name__ == "__main__":
import sys
speed(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment