Skip to content

Instantly share code, notes, and snippets.

@ppearson
Last active May 16, 2023 06:26
Show Gist options
  • Save ppearson/911e258efe651204c88c1a6bec5ba606 to your computer and use it in GitHub Desktop.
Save ppearson/911e258efe651204c88c1a6bec5ba606 to your computer and use it in GitHub Desktop.
get_src_sat_images.py
"""
A Python script to use gdal_translate to download VIIRS visibile satellite imagery
for the entire world (in TIFF tiled format), getting the daily composite of daytime
(noon I think?) for each day requested.
A python re-write (with multiprocess threading ability and tweaks to ignored return HTTP codes)
of the bash script provided here: https://hannes.enjoys.it/blog/2021/01/satellite-composite-of-earth-2020/
"""
import errno
import os
import sys
import argparse
import subprocess
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
baseOutputPath = "/home/$USER/data/visible_earth/src_data/16k/"
outputFilenamePrefix = "colData"
layerName = "VIIRS_SNPP_CorrectedReflectance_TrueColor"
# Note: this needs changing depending on the res wanted...
# Note: basically, see what gdal outputs when it says "Input size is...", and use the tile level which is >
# outsize dimensions requested below.
# Currently as of May 2023, this is sufficient for 32k x 16k input from earthdata.nasa.gov (Input file size is 32768, 16384),
# which gets downsampled via gdal_translate to 16k x 8k, with the files saved having that resolution.
tileLevel = 5
gdal_command = "gdal_translate"
def makedirs(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def build_gdal_wms_xml(layerName, dateStr, tileLevel):
xml = """<GDAL_WMS>
<Service name="TMS">
<ServerUrl>https://gibs.earthdata.nasa.gov/wmts/epsg4326/best/%s/default/%s/250m/${z}/${y}/${x}.jpg</ServerUrl>
</Service>
<DataWindow>
<UpperLeftX>-180.0</UpperLeftX>
<UpperLeftY>90</UpperLeftY>
<LowerRightX>396.0</LowerRightX>
<LowerRightY>-198</LowerRightY>
<TileLevel>%d</TileLevel>
<TileCountX>2</TileCountX>
<TileCountY>1</TileCountY>
<YOrigin>top</YOrigin>
</DataWindow>
<Projection>EPSG:4326</Projection>
<BlockSizeX>512</BlockSizeX>
<BlockSizeY>512</BlockSizeY>
<BandsCount>3</BandsCount>
<ZeroBlockOnServerException>true</ZeroBlockOnServerException>
<ZeroBlockHttpCodes>204,404,400</ZeroBlockHttpCodes>
</GDAL_WMS>""" % (layerName, dateStr, tileLevel)
xml = xml.replace("\n", "")
return '{}'.format(xml)
def build_gdal_command(dateStr, outputPath):
items = [gdal_command]
items = items + ["-outsize", "16384", "8192"]
items = items + ["-projwin", "-180", "90", "180", "-90"]
items = items + ["-of", "GTIFF", "-r", "cubic", "-co", "TILED=YES", "-co", "COMPRESS=DEFLATE", "-co", "PREDICTOR=2", "-co", "NUM_THREADS=ALL_CPUS"]
items.append(build_gdal_wms_xml(layerName, dateStr, tileLevel))
items.append(outputPath)
return items;
def run_command(command_and_args):
# TODO: this doesn't print errors/warnings until the process exits which is a bit annoying,
# and you don't see any output if you Ctrl+C the process early...
process = subprocess.Popen(command_and_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False)
# print(" ".join(command_and_args))
output, error = process.communicate()
if error:
print(f"Error for '{command_and_args}':")
print(error.decode())
# download a map (might be run in a thread)
def process_item(item):
commands = item
run_command(commands)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
prog='get_src_sat_images',
description='Gets Src satellite images',)
parser.add_argument("startDate", help="The start date in format: 'YYYY-MM-DD'")
parser.add_argument('--days', type=int, default=1, help='number of days after to fetch for.')
parser.add_argument('--threads', type=int, default=1, help='number of threads to use in parallel.')
parser.add_argument('--fillmissingonly', type=bool, default=False, help='Only run for days where filename is missing, to effectively fill in the gaps.')
args = parser.parse_args()
if not args.days:
print("Invalid 'days' arg.")
exit(0)
startDateTime = datetime.strptime(args.startDate, '%Y-%m-%d')
lastYear = None
lastMonth = None
fullYearPath = None
taskItems = []
for i in range(0, args.days):
targetDate = startDateTime + timedelta(days=i)
yearString = targetDate.strftime("%Y")
if yearString != lastYear:
fullYearPath = os.path.join(baseOutputPath, yearString)
makedirs(fullYearPath)
lastYear = yearString
monthString = targetDate.strftime("%m")
if monthString != lastMonth:
fullYearMonthPath = os.path.join(fullYearPath, monthString)
makedirs(fullYearMonthPath)
lastMonth = monthString
dateStr = targetDate.strftime("%Y-%m-%d")
targetFilename = "{}_{}.tif".format(outputFilenamePrefix, dateStr)
targetOutputPath = os.path.join(fullYearMonthPath, targetFilename)
if args.fillmissingonly:
if os.path.exists(targetOutputPath):
# it exists already, so ignore it
continue
command_items = build_gdal_command(dateStr, targetOutputPath)
taskItems.append((command_items))
if len(taskItems) == 0:
print("No items to run.")
exit(0)
if len(taskItems) == 1:
print("Processing 1 day...")
else:
print("Processing: {} days...".format(len(taskItems)))
# now process them, in parallel if required...
if args.threads <= 1:
for task in taskItems:
process_item(task)
else:
with ThreadPoolExecutor(max_workers=args.threads) as executor:
futures = [executor.submit(process_item, item) for item in taskItems]
results = [future.result() for future in futures]
print("Finished.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment