Created
September 25, 2019 01:05
-
-
Save lbesnard/dda6f5968be6474bffd295184a16d41a to your computer and use it in GitHub Desktop.
Ocean Colour - download data from oceandata.sci.gsfc.nasa.gov
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib | |
import re | |
from retry import retry | |
import os | |
from netCDF4 import Dataset | |
YEARS_TO_DOWNLOAD = list(range(1999, 2019)) | |
VARS_TO_DOWNLOAD = ['par'] | |
download_path = '/tmp/manu_data' | |
@retry(urllib.error.URLError, tries=4, delay=3, backoff=2) | |
def urlopen_with_retry(f): | |
if not os.path.exists(download_path): | |
os.makedirs(download_path) | |
output_path = os.path.join(download_path, | |
os.path.basename(f)) | |
# check if file already exists. overwrite only if corrupted | |
if os.path.exists(output_path): | |
if not is_netcdf_validity(output_path): | |
os.remove(output_path) | |
print('downloading {file}'.format(file=f)) | |
urllib.request.urlretrieve(f, output_path) | |
else: | |
print('downloading {file}'.format(file=f)) | |
urllib.request.urlretrieve(f, output_path) | |
def is_netcdf_validity(netcdf_path): | |
try: | |
with Dataset(netcdf_path, 'r'): | |
return True | |
except: | |
return False | |
def list_netcdf_links(): | |
netcdf_links = [] | |
for var in VARS_TO_DOWNLOAD: | |
for year in YEARS_TO_DOWNLOAD: | |
try: | |
page_link ="https://oceandata.sci.gsfc.nasa.gov/MODIS-Aqua/Mapped/8-Day/9km/{var}/{year}/".format(year=year, | |
var=var) | |
html_page = urllib.request.urlopen(page_link) | |
soup = BeautifulSoup(html_page) | |
for link in soup.findAll('a', attrs={'href': re.compile("^https://.*\.nc")}): | |
netcdf_links.append(link.get('href')) | |
except: | |
ValueError('{page} does not exist'.format(page=page_link)) | |
return netcdf_links | |
## Main part of the script | |
netcdf_links = list_netcdf_links() | |
for f in netcdf_links: | |
urlopen_with_retry(f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment