Skip to content

Instantly share code, notes, and snippets.

@arvestad
Created September 29, 2022 15:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arvestad/fbf3a99eb2cc52f26ae048294dcabdee to your computer and use it in GitHub Desktop.
Save arvestad/fbf3a99eb2cc52f26ae048294dcabdee to your computer and use it in GitHub Desktop.
UniProt API downloader code
import argparse
import re
import requests
from requests.adapters import HTTPAdapter, Retry
import sys
re_next_link = re.compile(r'<(.+)>; rel="next"')
retries = Retry(total=5, backoff_factor=0.25, status_forcelist=[500, 502, 503, 504])
session = requests.Session()
session.mount("https://", HTTPAdapter(max_retries=retries))
pagesize = 500
def get_next_link(headers):
if "Link" in headers:
match = re_next_link.match(headers["Link"])
if match:
return match.group(1)
def get_batch(batch_url):
batch_no = 1
while batch_url:
response = session.get(batch_url)
response.raise_for_status()
total = response.headers["x-total-results"]
if batch_no % 50 == 0:
print(f'{batch_no * pagesize} out of {total} ', file=sys.stderr, flush=True)
else:
print('.', file=sys.stderr, end='', flush=True)
yield response, total
batch_url = get_next_link(response.headers)
batch_no += 1
print('', file=sys.stderr)
def main(url, outfile):
if outfile:
oh = open(outfile, 'w')
else:
oh = sys.stdout
try:
for batch, total in get_batch(url):
for line in batch.text.splitlines():
print(line, file=oh)
except Exception as e:
print(f'An error occured when reading data from {url}', file=sys.stderr)
sys.exit(str(e))
finally:
if outfile:
oh.close()
def prepare_args():
ap = argparse.ArgumentParser(description='Download UniProt data in chunks, to comply with REST API policies.')
ap.add_argument('url', help='UniProt REST API URL')
ap.add_argument('-o', '--outputfile', help='Where to put the downloaded file')
return ap
if __name__ == '__main__':
ap = prepare_args()
args = ap.parse_args()
main(args.url, args.outputfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment