Skip to content

Instantly share code, notes, and snippets.

@Buttonwood
Last active August 29, 2015 13:57
Show Gist options
  • Save Buttonwood/9829031 to your computer and use it in GitHub Desktop.
Save Buttonwood/9829031 to your computer and use it in GitHub Desktop.
Parsing blast default output to a table file.
#!/usr/bin/env python
'''
#=============================================================================
# FileName: parser_m8_solar.py
# Desc: A script for parsering a blast m8 file to solared blocks;
# Author: tanhao
# Email: tanhao2013@gmail.com
# HomePage: http://buttonwood.github.io
# Version: a.0.1
# LastChange: 2014-03-19 09:22:07
# History: 2013-12-27 20:37:43
#=============================================================================
'''
import os
import argparse
def total_score(s = [], t = 1):
if t == 1:
return sum([abs(float(i)) for i in s])
else:
return sum([float(i) for i in s])
def print_list(l=[]):
#return ";".join([repr(i) for i in l])
s = ''
for x in l:
s += x +";"
return s
def print_list_tuple(lt):
# l = [(1, 3), (2, 3), (3, 8), (5, 7), (7, 9)]
# print_list_tuple(l) will be 1,3;2,3;3,8;5,7;7,9;
s = ''
for x,y in lt:
s += repr(x)+","+repr(y)+";"
return s
def get_st_ed_list_tuple(lt):
# l = [(1, 3), (2, 3), (3, 8), (5, 7), (7, 9)]
# get_st_ed_list_tuple(l) will return [1,9]
(a,b) = lt[0]
for x,y in lt:
if a > min(x,y):
a = min(x,y)
if b < max(x,y):
b = max(x,y)
return [a,b]
def main(args):
writer = open(args.output, "w")
query = ""
subject = ""
score = []
iden = []
q_block = []
s_block = []
with open(args.input,'r') as reader:
for line in reader:
fields = line.rstrip("\n").split("\t")
if query == fields[0]:
if subject == fields[1]:
if (int(fields[8]) < int(fields[9])):
score.append("+"+repr(float(fields[-1])))
else:
score.append("-"+repr(float(fields[-1])))
iden.append(fields[2])
q_block.append([int(fields[6]),int(fields[7])])
s_block.append([int(fields[8]),int(fields[9])])
else:
if subject:
# print out
text = ''
a, b = get_st_ed_list_tuple(q_block)
text += query + "\t" + repr(a) + "\t" + repr(b) + "\t"
if total_score(score,0) < 0:
text += "-\t"
else:
text += "+\t"
a, b = get_st_ed_list_tuple(s_block)
text += subject + "\t" + repr(a) + "\t" + repr(b) + "\t"
c = len(iden)
text += repr(c) + "\t" + "%.1f" % (total_score(score)) + "\t"
#print text,
#print "%.2f" % (total_score(iden)/c),
text += "%.2f" % (total_score(iden)/c)
text += "\t" + print_list_tuple(q_block) + "\t" + print_list_tuple(s_block) + "\t"
text += print_list(score) + "\t" + print_list(iden)
#print text
#print "\n"
writer.write(text)
writer.write("\n")
subject = fields[1]
score = []
iden = []
q_block = []
s_block = []
if (int(fields[8]) < int(fields[9])):
score.append("+"+repr(float(fields[-1])))
else:
score.append("-"+repr(float(fields[-1])))
iden.append(fields[2])
q_block.append([int(fields[6]),int(fields[7])])
s_block.append([int(fields[8]),int(fields[9])])
else:
if query:
text = ''
a, b = get_st_ed_list_tuple(q_block)
text += query + "\t" + repr(a) + "\t" + repr(b) + "\t"
if total_score(score,0) < 0:
text += "-\t"
else:
text += "+\t"
a, b = get_st_ed_list_tuple(s_block)
text += subject + "\t" + repr(a) + "\t" + repr(b) + "\t"
c = len(iden)
text += repr(c) + "\t" + "%.1f" % (total_score(score)) + "\t"
#print text,
#print "%.2f" % (total_score(iden)/c),
text += "%.2f" % (total_score(iden)/c)
text += "\t" + print_list_tuple(q_block) + "\t" + print_list_tuple(s_block) + "\t"
text += print_list(score) + "\t" + print_list(iden)
#print text
#print "\n"
writer.write(text)
writer.write("\n")
query = fields[0]
subject = fields[1]
score = []
if (int(fields[8]) < int(fields[9])):
score.append("+"+repr(float(fields[-1])))
else:
score.append("-"+repr(float(fields[-1])))
iden = []
iden.append(fields[2])
q_block = []
q_block.append([int(fields[6]),int(fields[7])])
s_block = []
s_block.append([int(fields[8]),int(fields[9])])
if query:
text = ''
a, b = get_st_ed_list_tuple(q_block)
text += query + "\t" + repr(a) + "\t" + repr(b) + "\t"
if total_score(score,0) < 0:
text += "-\t"
else:
text += "+\t"
a, b = get_st_ed_list_tuple(s_block)
text += subject + "\t" + repr(a) + "\t" + repr(b) + "\t"
c = len(iden)
text += repr(c) + "\t" + "%.1f" % (total_score(score)) + "\t"
#print text,
#print "%.2f" % (total_score(iden)/c),
text += "%.2f" % (total_score(iden)/c)
text += "\t" + print_list_tuple(q_block) + "\t" + print_list_tuple(s_block) + "\t"
text += print_list(score) + "\t" + print_list(iden)
#print text
#print "\n"
writer.write(text)
writer.write("\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
description="""
A script for parsering a blast m8 file to solared blocks.
input -> blast.m8:
contig-550123 contig-550123 100.00 991 0 0 1 991 1 991 0.0 1875
contig-550123 contig-5207937 96.70 454 15 0 537 990 1 454 0.0 741
output -> blast.m8.solar:
contig-999996 332 272 332 - contig-5473656 351 291 351 1 121 272,332;
""")
parser.add_argument(
"-i", "--input",
required=True,
help="input a blast m8 file")
parser.add_argument(
"-o", "--output",
required=True,
help="output a solared file")
args = parser.parse_args()
main(args)
#=============================================================================
# FileName: paser_blast_de_hsp.pl
# Desc:
# Author: tanhao
# Email: tanhao2013@gmail.com
# HomePage: http://buttonwood.github.io
# Version: 0.0.1
# LastChange: 2014-03-28 17:40:48
# History:
#=============================================================================
# It may be freely distributed under GNU General Public License.
#
# This script will parse a NCBI blastx output file and output the top
# N hits of each blast search result.
#
# For each hit, the following results are reported:
# Query name, Query length, Query start, Query end, Strand,
# Subject name, Subject length, Subject start, Subject end,
# Bit score, E value, Positives, and Identical
#
# The results are tab-deliminated and ready for import into a spreadsheet
# program for browsing and further analysis.
#=============================================================================
#!/usr/bin/perl -w
use strict;
use Bio::SearchIO;
die ("perl $0 *blast.de\n") unless (@ARGV==1);
my $file = $ARGV[0];
my $in = new Bio::SearchIO(-format => 'blast',
-file => $file);
print "Query_name\tQuery_length\tQuery_start\tQuery_end\t";#Strand\t";
print "Subject_name\tSubject_length\tSubject_start\tSubject_end\t";
print "Hit\tIdentical\tBit_score\tE_value\tConserved\tPositives\n";
while( my $result = $in->next_result ) {
## $result is a Bio::Search::Result::ResultI compliant object
while( my $hit = $result->next_hit ) {
## $hit is a Bio::Search::Hit::HitI compliant object
while( my $hsp = $hit->next_hsp ) {
print $result->query_name,
"\t", $result->query_length,
"\t", $hsp->start('query') .
"\t", $hsp->end('query'),
#"\t", $hsp->strand('query'),
#"\t", $hsp->query->strand,
"\t", $hit->name,
"\t", $hit->length,
"\t", $hsp->start('hit'),
"\t", $hsp->end('hit'),
#"\t", $hsp->strand('hit'),
"\t", $hsp->length('total'),
"\t", $hsp->num_identical,
"\t", $hsp->bits,
"\t", $hsp->significance,
"\t", sprintf("%.2f",($hsp->frac_conserved)*100),
"\t", sprintf("%.2f",($hsp->frac_identical)*100), "\n";
}
}
}
#************* Float like a butterfly! Stand like a buttonwood! *************
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment