Last active
August 29, 2015 13:57
-
-
Save Buttonwood/9829031 to your computer and use it in GitHub Desktop.
Parsing blast default output to a table file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
#============================================================================= | |
# FileName: parser_m8_solar.py | |
# Desc: A script for parsering a blast m8 file to solared blocks; | |
# Author: tanhao | |
# Email: tanhao2013@gmail.com | |
# HomePage: http://buttonwood.github.io | |
# Version: a.0.1 | |
# LastChange: 2014-03-19 09:22:07 | |
# History: 2013-12-27 20:37:43 | |
#============================================================================= | |
''' | |
import os | |
import argparse | |
def total_score(s = [], t = 1): | |
if t == 1: | |
return sum([abs(float(i)) for i in s]) | |
else: | |
return sum([float(i) for i in s]) | |
def print_list(l=[]): | |
#return ";".join([repr(i) for i in l]) | |
s = '' | |
for x in l: | |
s += x +";" | |
return s | |
def print_list_tuple(lt): | |
# l = [(1, 3), (2, 3), (3, 8), (5, 7), (7, 9)] | |
# print_list_tuple(l) will be 1,3;2,3;3,8;5,7;7,9; | |
s = '' | |
for x,y in lt: | |
s += repr(x)+","+repr(y)+";" | |
return s | |
def get_st_ed_list_tuple(lt): | |
# l = [(1, 3), (2, 3), (3, 8), (5, 7), (7, 9)] | |
# get_st_ed_list_tuple(l) will return [1,9] | |
(a,b) = lt[0] | |
for x,y in lt: | |
if a > min(x,y): | |
a = min(x,y) | |
if b < max(x,y): | |
b = max(x,y) | |
return [a,b] | |
def main(args): | |
writer = open(args.output, "w") | |
query = "" | |
subject = "" | |
score = [] | |
iden = [] | |
q_block = [] | |
s_block = [] | |
with open(args.input,'r') as reader: | |
for line in reader: | |
fields = line.rstrip("\n").split("\t") | |
if query == fields[0]: | |
if subject == fields[1]: | |
if (int(fields[8]) < int(fields[9])): | |
score.append("+"+repr(float(fields[-1]))) | |
else: | |
score.append("-"+repr(float(fields[-1]))) | |
iden.append(fields[2]) | |
q_block.append([int(fields[6]),int(fields[7])]) | |
s_block.append([int(fields[8]),int(fields[9])]) | |
else: | |
if subject: | |
# print out | |
text = '' | |
a, b = get_st_ed_list_tuple(q_block) | |
text += query + "\t" + repr(a) + "\t" + repr(b) + "\t" | |
if total_score(score,0) < 0: | |
text += "-\t" | |
else: | |
text += "+\t" | |
a, b = get_st_ed_list_tuple(s_block) | |
text += subject + "\t" + repr(a) + "\t" + repr(b) + "\t" | |
c = len(iden) | |
text += repr(c) + "\t" + "%.1f" % (total_score(score)) + "\t" | |
#print text, | |
#print "%.2f" % (total_score(iden)/c), | |
text += "%.2f" % (total_score(iden)/c) | |
text += "\t" + print_list_tuple(q_block) + "\t" + print_list_tuple(s_block) + "\t" | |
text += print_list(score) + "\t" + print_list(iden) | |
#print text | |
#print "\n" | |
writer.write(text) | |
writer.write("\n") | |
subject = fields[1] | |
score = [] | |
iden = [] | |
q_block = [] | |
s_block = [] | |
if (int(fields[8]) < int(fields[9])): | |
score.append("+"+repr(float(fields[-1]))) | |
else: | |
score.append("-"+repr(float(fields[-1]))) | |
iden.append(fields[2]) | |
q_block.append([int(fields[6]),int(fields[7])]) | |
s_block.append([int(fields[8]),int(fields[9])]) | |
else: | |
if query: | |
text = '' | |
a, b = get_st_ed_list_tuple(q_block) | |
text += query + "\t" + repr(a) + "\t" + repr(b) + "\t" | |
if total_score(score,0) < 0: | |
text += "-\t" | |
else: | |
text += "+\t" | |
a, b = get_st_ed_list_tuple(s_block) | |
text += subject + "\t" + repr(a) + "\t" + repr(b) + "\t" | |
c = len(iden) | |
text += repr(c) + "\t" + "%.1f" % (total_score(score)) + "\t" | |
#print text, | |
#print "%.2f" % (total_score(iden)/c), | |
text += "%.2f" % (total_score(iden)/c) | |
text += "\t" + print_list_tuple(q_block) + "\t" + print_list_tuple(s_block) + "\t" | |
text += print_list(score) + "\t" + print_list(iden) | |
#print text | |
#print "\n" | |
writer.write(text) | |
writer.write("\n") | |
query = fields[0] | |
subject = fields[1] | |
score = [] | |
if (int(fields[8]) < int(fields[9])): | |
score.append("+"+repr(float(fields[-1]))) | |
else: | |
score.append("-"+repr(float(fields[-1]))) | |
iden = [] | |
iden.append(fields[2]) | |
q_block = [] | |
q_block.append([int(fields[6]),int(fields[7])]) | |
s_block = [] | |
s_block.append([int(fields[8]),int(fields[9])]) | |
if query: | |
text = '' | |
a, b = get_st_ed_list_tuple(q_block) | |
text += query + "\t" + repr(a) + "\t" + repr(b) + "\t" | |
if total_score(score,0) < 0: | |
text += "-\t" | |
else: | |
text += "+\t" | |
a, b = get_st_ed_list_tuple(s_block) | |
text += subject + "\t" + repr(a) + "\t" + repr(b) + "\t" | |
c = len(iden) | |
text += repr(c) + "\t" + "%.1f" % (total_score(score)) + "\t" | |
#print text, | |
#print "%.2f" % (total_score(iden)/c), | |
text += "%.2f" % (total_score(iden)/c) | |
text += "\t" + print_list_tuple(q_block) + "\t" + print_list_tuple(s_block) + "\t" | |
text += print_list(score) + "\t" + print_list(iden) | |
#print text | |
#print "\n" | |
writer.write(text) | |
writer.write("\n") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.RawTextHelpFormatter, | |
description=""" | |
A script for parsering a blast m8 file to solared blocks. | |
input -> blast.m8: | |
contig-550123 contig-550123 100.00 991 0 0 1 991 1 991 0.0 1875 | |
contig-550123 contig-5207937 96.70 454 15 0 537 990 1 454 0.0 741 | |
output -> blast.m8.solar: | |
contig-999996 332 272 332 - contig-5473656 351 291 351 1 121 272,332; | |
""") | |
parser.add_argument( | |
"-i", "--input", | |
required=True, | |
help="input a blast m8 file") | |
parser.add_argument( | |
"-o", "--output", | |
required=True, | |
help="output a solared file") | |
args = parser.parse_args() | |
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#============================================================================= | |
# FileName: paser_blast_de_hsp.pl | |
# Desc: | |
# Author: tanhao | |
# Email: tanhao2013@gmail.com | |
# HomePage: http://buttonwood.github.io | |
# Version: 0.0.1 | |
# LastChange: 2014-03-28 17:40:48 | |
# History: | |
#============================================================================= | |
# It may be freely distributed under GNU General Public License. | |
# | |
# This script will parse a NCBI blastx output file and output the top | |
# N hits of each blast search result. | |
# | |
# For each hit, the following results are reported: | |
# Query name, Query length, Query start, Query end, Strand, | |
# Subject name, Subject length, Subject start, Subject end, | |
# Bit score, E value, Positives, and Identical | |
# | |
# The results are tab-deliminated and ready for import into a spreadsheet | |
# program for browsing and further analysis. | |
#============================================================================= | |
#!/usr/bin/perl -w | |
use strict; | |
use Bio::SearchIO; | |
die ("perl $0 *blast.de\n") unless (@ARGV==1); | |
my $file = $ARGV[0]; | |
my $in = new Bio::SearchIO(-format => 'blast', | |
-file => $file); | |
print "Query_name\tQuery_length\tQuery_start\tQuery_end\t";#Strand\t"; | |
print "Subject_name\tSubject_length\tSubject_start\tSubject_end\t"; | |
print "Hit\tIdentical\tBit_score\tE_value\tConserved\tPositives\n"; | |
while( my $result = $in->next_result ) { | |
## $result is a Bio::Search::Result::ResultI compliant object | |
while( my $hit = $result->next_hit ) { | |
## $hit is a Bio::Search::Hit::HitI compliant object | |
while( my $hsp = $hit->next_hsp ) { | |
print $result->query_name, | |
"\t", $result->query_length, | |
"\t", $hsp->start('query') . | |
"\t", $hsp->end('query'), | |
#"\t", $hsp->strand('query'), | |
#"\t", $hsp->query->strand, | |
"\t", $hit->name, | |
"\t", $hit->length, | |
"\t", $hsp->start('hit'), | |
"\t", $hsp->end('hit'), | |
#"\t", $hsp->strand('hit'), | |
"\t", $hsp->length('total'), | |
"\t", $hsp->num_identical, | |
"\t", $hsp->bits, | |
"\t", $hsp->significance, | |
"\t", sprintf("%.2f",($hsp->frac_conserved)*100), | |
"\t", sprintf("%.2f",($hsp->frac_identical)*100), "\n"; | |
} | |
} | |
} | |
#************* Float like a butterfly! Stand like a buttonwood! ************* |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment