Rename a genbank file to the description of the it's first sequence
#!/usr/bin/env python | |
import sys | |
import os | |
# Biopython libraries or nothing | |
try: | |
from Bio import SeqIO | |
except ImportError: | |
sys.exit("Biopython library not found. See http://biopython.org for installation instructions.") | |
# User have to pass the genbank file as the first argument in command line | |
if len(sys.argv) < 2: | |
sys.exit('Usage: %s <GENBANK FILE>' % sys.argv[0]) | |
# File must exist | |
genbank_file = sys.argv[1] | |
if not os.path.exists(genbank_file): | |
sys.exit('ERROR: File %s was not found!' % sys.argv[1]) | |
# Retrieves the informations and prepares the new file name | |
parser = SeqIO.parse(open(genbank_file,"r"), "genbank") | |
gb_first_record = parser.next() | |
description = gb_first_record.description.replace(',', '').replace('.', '').replace(' ','_') | |
os.rename(genbank_file, description+".gbk") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment