Skip to content

Instantly share code, notes, and snippets.

@chasemc
Created August 7, 2022 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chasemc/0b283e7debf30f88a04b6ff01420f3ca to your computer and use it in GitHub Desktop.
Save chasemc/0b283e7debf30f88a04b6ff01420f3ca to your computer and use it in GitHub Desktop.
Download an NCBI genbank file for a subset of a nucleotide sequence
import requests
from pathlib import Path
def dl_subsequence_gbk(outdir, accession, n_start, n_stop):
response = requests.post(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
params={
"db": "nuccore",
"rettype": "gb",
"from": str(n_start),
"to": str(n_stop),
},
files={"id": accession},
)
if not response.ok:
print(f"Failed with response: {response.reason}")
else:
try:
organism = [
i.strip()
for i in response.text.split("\n")
if i.strip().startswith("ORGANISM")
][0]
organism = organism.removeprefix("ORGANISM")
organism = organism.strip()
organism = organism.replace(" ", "-")
except:
organism = "NA"
try:
assembly = [
i.strip()
for i in response.text.split("\n")
if i.strip().startswith("Assembly")
][0]
assembly = assembly.removeprefix("Assembly:")
assembly = assembly.strip()
assembly = assembly.replace(" ", "_")
except:
assembly = "NA"
filepath = Path(outdir, f"{assembly}_{accession}_{organism}.gbk")
with open(filepath, "w") as file1:
file1.writelines(response.text)
@chasemc
Copy link
Author

chasemc commented Aug 7, 2022

dl_subsequence_gbk(
    outdir="/home/chase/Downloads/test",
    accession="NZ_LFOD01000025.1",
    n_start=1,
    n_stop=10000,
)

Will result in the file:

/home/chase/Downloads/test/GCF_001077745.1_NZ_LFOD01000025.1_Mycolicibacterium-conceptionense.gbk

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment