Usage:
./get-kmers-for-hashes.py data/GCF_000005845.2_ASM584v2_genomic.fna.gz 31 9061051479453
#! /usr/bin/env python | |
import sys | |
import screed | |
import sourmash_lib | |
from sourmash_lib._minhash import hash_murmur | |
genome_file = sys.argv[1] | |
ksize = int(sys.argv[2]) | |
hashvals = set([ int(x) for x in sys.argv[3:] ]) | |
def kmers(seq): | |
for i in range(0, len(seq) - ksize + 1): | |
yield seq[i:i+ksize] | |
for record in screed.open(genome_file): | |
for kmer in kmers(record.sequence.upper()): | |
if hash_murmur(kmer) in hashvals: | |
print(kmer, hash_murmur(kmer)) |