Last active
April 21, 2020 06:49
-
-
Save piroyon/2e6c3f308eb62c0a783b7ad45c6cc007 to your computer and use it in GitHub Desktop.
Format the result of diamond (vs.nr) xml file for blast2go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
=head1 SCRIPT NAME | |
diamond2b2g.pl | |
=head1 DESCRIPTION | |
Format the result of diamond (vs.nr) xml file for blast2go | |
=head1 USAGE | |
0. Run diamond | |
$ diamond blastx --db nr --out result.xml --query query.fa --outfmt 5 | |
1. Make GI and accession list | |
$ blastdbcmd -db nr -entry all -outfmt "%g %i" > gilist | |
$ head gilist | |
489223532 ref|WP_003131952.1| | |
15674171 ref|NP_268346.1| | |
13878750 sp|Q9CDN0.1|RS18_LACLA | |
2. Run | |
$ perl diamond2b2g.pl gilistdb result.xml > result_forb2g.xml | |
=cut | |
#!/usr/bin/perl | |
$gilist = $ARGV[0]; | |
$diamondxml = $ARGV[1]; | |
open G, $gilist; | |
while(<G>) { | |
@a = split; | |
$a[1] =~ /^(\w+)\|([\w\.]+)/; | |
$db = $1; | |
$acc = $2; | |
$hash{$acc} = "$a[0]\t$db"; | |
} | |
close G; | |
open D, $diamondxml; | |
while(<D>) { | |
if (/<Hit_id>([\w\.\-]+)</) { | |
$acc = $1; | |
@gidb = split("\t", $hash{$acc}); | |
$gi = $gidb[0]; | |
$db = $gidb[1]; | |
print "<Hit_id>gi|$gi|$db|$acc|</Hit_id>\n"; | |
} | |
else { | |
print; | |
} | |
} | |
close D; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment