Skip to content

Instantly share code, notes, and snippets.

@cthoyt
Created April 16, 2024 09:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cthoyt/51d4e0c2f411933d337cc6aae5ec785c to your computer and use it in GitHub Desktop.
Save cthoyt/51d4e0c2f411933d337cc6aae5ec785c to your computer and use it in GitHub Desktop.
Get bioregistry data from first of each month
import requests
from dateutil.parser import parse
res_json = requests.get(
f'https://pypi.org/pypi/bioregistry/json',
headers={'Accept': 'application/json'}
).json()
releases = {
parse(data[0]['upload_time']): version
for version, data in res_json['releases'].items()
}
releases = dict(sorted(releases.items()))
from collections import defaultdict
dd = defaultdict(list)
for version, date in releases.items():
dd[date.year, date.month].append((date, version))
from functools import lru_cache
@lru_cache(None)
def get_data(version):
url = f"https://raw.githubusercontent.com/biopragmatics/bioregistry/v{version}/src/bioregistry/data/bioregistry.json"
res = requests.get(url)
res.raise_for_status()
return res.json()
from tqdm.auto import tqdm
import time
rows = []
for parts in tqdm(dd.values()):
date, version = min(parts)
if date.year <= 2022:
continue # skip the dark days
time.sleep(1)
data = get_data(version)
rows.append((date, version, url, data))
# then, profit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment