Last active
February 11, 2018 15:42
-
-
Save ShaiberAlon/2d300ed12154facbe32be77769338577 to your computer and use it in GitHub Desktop.
summarize_blast_results_with_query_contig.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Click 'Download > Multiple-file JSON' from NCBI search results page, | |
# unzip it, run this script in it without any parameters, get the | |
# markdown formatted table. | |
import json | |
import glob | |
# poor man's whatever: | |
QUERY = lambda: hits['BlastOutput2']['report']['results']['search']['query_title'].split('___')[0] | |
QLEN = lambda: hits['BlastOutput2']['report']['results']['search']['query_len'] | |
HIT = lambda: hits['BlastOutput2']['report']['results']['search']['hits'][index] | |
DESC = lambda: HIT()['description'][index] | |
TITLE = lambda: DESC()['title'] | |
SCINAME = lambda: '_%s_' % DESC()['sciname'] | |
ACC = lambda: '[%(desc)s](https://www.ncbi.nlm.nih.gov/protein/%(desc)s)' % {'desc': DESC()['accession']} | |
HSPS = lambda: HIT()['hsps'][0] | |
PCTALIGN = lambda: '%.2f%%' % (100 - ((HSPS()['align_len']* 100 / QLEN()) - 100)) | |
PCTID = lambda: '%.2f%%' % (HSPS()['identity'] * 100 / HSPS()['align_len']) | |
CONTIG = lambda: hits['BlastOutput2']['report']['results']['search']['query_title'].split('|')[3].split(':')[1] | |
print('|'.join(['', 'query contig', 'Found in the assembly', 'Best hit on NCBI', 'Percent alignment', 'Percent identity', 'Accession', ''])) | |
print('|'.join(['', ':--', ':--', ':--', ':--:', ':--:', ':--:', ''])) | |
# go through every json file in the directory: | |
for j in glob.glob('*.json'): | |
hits = json.load(open(j)) | |
# skip the poop file | |
if 'BlastOutput2' not in hits: | |
continue | |
# report the best hit: | |
index = 0 | |
# unless the best hit resolves to a multispecies .. if it does, increment | |
# index | |
while 1: | |
if TITLE().find('MULTISPECIES') == -1: | |
break | |
index += 1 | |
print('|'.join(['', CONTIG(), QUERY(), SCINAME(), PCTALIGN(), PCTID(), ACC(), ''])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment