Skip to content

Instantly share code, notes, and snippets.

@kwilcox
Created May 27, 2014 13:42
Show Gist options
  • Save kwilcox/9fbcd546cdd8a55856d5 to your computer and use it in GitHub Desktop.
Save kwilcox/9fbcd546cdd8a55856d5 to your computer and use it in GitHub Desktop.
Below is a python script that can be used to harvest THEDDS catalogs and save the ISO metadata files to a local directory Below is a python script that can be used to harvest THEDDS catalogs and save the ISO metadata files to a local directory
import os
import urllib
from thredds_crawler.crawl import Crawl
import logging
import logging.handlers
logger = logging.getLogger('thredds_crawler')
fh = logging.handlers.RotatingFileHandler('/var/log/iso_harvest/iso_harvest.log', maxBytes=1024*1024*10, backupCount=5)
fh.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
logger.setLevel(logging.DEBUG)
SAVE_DIR="/srv/http/iso"
THREDDS_SERVERS = {
"aoos" : "http://thredds.axiomalaska.com/thredds/catalogs/aoos.html",
"cencoos": "http://thredds.axiomalaska.com/thredds/catalogs/cencoos.html"
}
for subfolder, thredds_url in THREDDS_SERVERS.items():
logger.info("Crawling %s (%s)" % (subfolder, thredds_url))
crawler = Crawl(thredds_url, debug=True)
isos = [s.get("url") for d in crawler.datasets for s in d.services if s.get("service").lower() == "iso"]
for iso in isos:
try:
filename = iso.split("/")[-1].split(".")[0] + ".iso.xml"
filepath = os.path.join(SAVE_DIR, subfolder, filename)
logger.info("Downloading/Saving %s" % filepath)
urllib.urlretrieve(iso, filepath)
except BaseException:
logger.exception("Error!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment