scottstanie/sentinel1_s3_urls.py

## sentinel1_s3_urls.py
import requests
from concurrent.futures import ThreadPoolExecutor


def get_s3_direct_urls(
    safe_names: list[str], max_workers: int = 5
) -> str | None:
    """Get the S3 urls for a list of SAFE granules."""

    def _get_url(safe_name):
        item_url = "https://cmr.earthdata.nasa.gov/stac/ASF/collections/SENTINEL-1{sat}_SLC.v1/items/{safe_name}-SLC"
        # example:
        # https://cmr.earthdata.nasa.gov/stac/ASF/collections/SENTINEL-1A_SLC.v1/items/S1A_IW_SLC__1SDV_20150302T000329_20150302T000356_004845_006086_51B0-SLC
        sat = "A" if safe_name.startswith("S1A") else "B"
        resp = requests.get(item_url.format(safe_name=safe_name, sat=sat))
        resp.raise_for_status()
        js = resp.json()

        # Get the "Concept" url which has the S3 bucket link
        # example: "https://cmr.earthdata.nasa.gov/search/concepts/G1345380784-ASF.json"
        concept_url = [
            link["href"]
            for link in js["links"]
            if link["href"].endswith("-ASF.json")
        ][-1]

        # Now using the concept url, get the S3 bucket in one of the links.
        # It will be the one that starts with "s3://"
        resp = requests.get(concept_url)
        resp.raise_for_status()
        js = resp.json()
        s3_url = [
            link["href"]
            for link in js["links"]
            if link["href"].startswith("s3://")
        ][0]
        return s3_url

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        urls = list(executor.map(_get_url, safe_names))
    return urls
	import requests
	from concurrent.futures import ThreadPoolExecutor


	def get_s3_direct_urls(
	safe_names: list[str], max_workers: int = 5
	) -> str \| None:
	"""Get the S3 urls for a list of SAFE granules."""

	def _get_url(safe_name):
	item_url = "https://cmr.earthdata.nasa.gov/stac/ASF/collections/SENTINEL-1{sat}_SLC.v1/items/{safe_name}-SLC"
	# example:
	# https://cmr.earthdata.nasa.gov/stac/ASF/collections/SENTINEL-1A_SLC.v1/items/S1A_IW_SLC__1SDV_20150302T000329_20150302T000356_004845_006086_51B0-SLC
	sat = "A" if safe_name.startswith("S1A") else "B"
	resp = requests.get(item_url.format(safe_name=safe_name, sat=sat))
	resp.raise_for_status()
	js = resp.json()

	# Get the "Concept" url which has the S3 bucket link
	# example: "https://cmr.earthdata.nasa.gov/search/concepts/G1345380784-ASF.json"
	concept_url = [
	link["href"]
	for link in js["links"]
	if link["href"].endswith("-ASF.json")
	][-1]

	# Now using the concept url, get the S3 bucket in one of the links.
	# It will be the one that starts with "s3://"
	resp = requests.get(concept_url)
	resp.raise_for_status()
	js = resp.json()
	s3_url = [
	link["href"]
	for link in js["links"]
	if link["href"].startswith("s3://")
	][0]
	return s3_url

	with ThreadPoolExecutor(max_workers=max_workers) as executor:
	urls = list(executor.map(_get_url, safe_names))
	return urls