Skip to content

Instantly share code, notes, and snippets.

@bdrown
Created April 27, 2020 15:24
Show Gist options
  • Save bdrown/747c6fa0edc3b21f8c9c76288913a6e2 to your computer and use it in GitHub Desktop.
Save bdrown/747c6fa0edc3b21f8c9c76288913a6e2 to your computer and use it in GitHub Desktop.
Searches current directory for zip files that contains genbank files, then extracts and renames them.
#!/usr/bin/python
# zip_file_extractor.py
#
# Searches current directory for zip files that contains genbank files, then extracts and renames them.
#
# Usage: python zip_file_extractor.py
#
# 4/17/2020 - Initial script (Chloe Smith)
# 4/27/2020 - Implemented file renaming (Bryon Drown)
from zipfile import ZipFile
import os
#starts the program
print('Extract .gbk file from ZIP')
# iterate through files in the current directory
for file in os.listdir("."):
filename=os.fsdecode(file)
# calling the zip folders to go through the extraction
if filename.endswith(".zip"):
print('Extracting ' + filename)
output = os.path.splitext(filename)[0] + '.gbk' # determine name of output file
with ZipFile(filename, 'r') as zipObj: # reads the zip files
# iterate through archived files
for archivedFile in zipObj.namelist():
# find the file of interest
if archivedFile.endswith('region001.gbk'):
with open(output, 'bw') as outfile:
print('Genbank file found, writing to ' + output)
# instead of extracting and renaming the file, just read contents and write to output
outfile.write(zipObj.read(archivedFile))
print('Complete')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment