Skip to content

Instantly share code, notes, and snippets.

@rob-p
Created September 28, 2021 21:17
Show Gist options
  • Save rob-p/74087f1335de806a26144cf18a9d8aca to your computer and use it in GitHub Desktop.
Save rob-p/74087f1335de806a26144cf18a9d8aca to your computer and use it in GitHub Desktop.
filter containments out of a PAF file
from pafpy import PafFile
import argparse
import sys
def main(argsj):
pi = args.paf
of = args.tblast
def frac_covered(r):
return float(r.qend - r.qstart) / r.qlen
ofile = open(of, 'w')
ofile.write('qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\n')
with PafFile(pi) as paf:
for record in paf:
if record.qname != record.tname:
if record.qlen >= args.minlen:
if (frac_covered(record) >= args.fract) and (record.blast_identity() >= args.ident):
ofile.write(f"{record.qname}\t{record.tname}\t{record.blast_identity()}\t*\t*\t*\t*\t*\t*\t*\t*\t*\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Filter minimap2 output to find high-quality containment.')
parser.add_argument('--paf', help='input PAF file')
parser.add_argument('--tblast', help='output tab-delimited BLAST file')
parser.add_argument('--minlen', default=500, type=int, help='minimum contig length to consider')
parser.add_argument('--ident', default=0.95, type=float, help='required percent identity')
parser.add_argument('--fract', default=0.95, type=float, help='fraction of the contained contig that must be covered by the match')
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment