Skip to content

Instantly share code, notes, and snippets.

@karthikraman
Created June 8, 2015 08:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karthikraman/93034b697252cb0963e9 to your computer and use it in GitHub Desktop.
Save karthikraman/93034b697252cb0963e9 to your computer and use it in GitHub Desktop.
Parse BLAST XML output, with an e-value cutoff
#!/usr/bin/python
import sys
from optparse import OptionParser
usage="Usage: %prog -f <filename> -p <positives> -l <length>"
parser = OptionParser(usage)
parser.add_option("-f", "--file", dest="filename", help="write report to FILE")
parser.add_option("-p", "--positives", type="float", dest="pos", help="% positives")
parser.add_option("-l", "--length", type="float", dest="len", help="% length")
import re
(options,args)=parser.parse_args()
if 0: #check here in future for errors!!
print
else:
from Bio.Blast import NCBIXML
blast_file=open(options.filename,'r')
blast_records = NCBIXML.parse(blast_file)
length = options.len
pos = options.pos
nQuery = 0
E_VALUE_THRESH = 0.0001
for blast_record in blast_records:
nQuery=nQuery+1
nHits=0
print nQuery, "\t", re.sub(' .*','',blast_record.query), "\t",
for alignment in blast_record.alignments:
for hsp in alignment.hsps:
if hsp.expect < E_VALUE_THRESH:
if hsp.align_length*100/blast_record.query_letters>=length:
if hsp.positives*100/hsp.align_length>=pos:
length_percent=hsp.align_length*100/blast_record.query_letters
pos_percent=hsp.positives*100/hsp.align_length
print '%s:%e:L=%d:P=%d' % (alignment.title,hsp.expect,length_percent,pos_percent),
nHits=nHits+1
print "\t%d" % nHits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment