Skip to content

Instantly share code, notes, and snippets.

@bendichter
Created April 27, 2022 21:36
Show Gist options
  • Save bendichter/c2380991b7c403aeec247c259d38e9e5 to your computer and use it in GitHub Desktop.
Save bendichter/c2380991b7c403aeec247c259d38e9e5 to your computer and use it in GitHub Desktop.
Iterate over an entire collection and download each file from each article
import os
import json
import requests
from tqdm import tqdm
from urllib.request import urlretrieve
BASE_URL = 'https://api.figshare.com/v2'
def download_collection(collection_id, destination):
if not os.path.exists(destination):
os.mkdir(destination)
# get all articles for collection
article_records = json.loads(requests.get(BASE_URL + f'/collections/{collection_id}/articles?page=1&page_size=1000').content)
for article_record in tqdm(article_records, desc="articles"):
# if article directory does not exist, create it
article_directory = os.path.join(destination, article_record["title"])
if not os.path.exists(article_directory):
os.mkdir(article_directory)
# get all metadata for that article
article_metadata = json.loads(requests.get(BASE_URL + f'/articles/{article_record["id"]}').content)
# write metadata file
metadata_filepath = os.path.join(article_directory, "metadata.json")
if not os.path.exists(metadata_filepath):
with open(metadata_filepath, "w") as metadata_file:
json.dump(
dict(NWBFile=dict(experiment_description=article_metadata["description"])),
metadata_file,
)
# download data files
file_records = article_metadata["files"]
for file_record in tqdm(file_records, desc=f"files in article {article_record['title']}"):
filepath = os.path.join(article_directory, file_record['name'])
if os.path.exists(filepath) and os.path.getsize(filepath) == file_record["size"]:
continue
urlretrieve(file_record['download_url'], filepath)
download_collection(5043830, "/Users/bendichter/Downloads/Schiavo2020")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment