Skip to content

Instantly share code, notes, and snippets.

@guziy
Last active August 29, 2015 14:14
Some script I've used to download snow extent data
import urllib2
import re
import os
url = "http://www.globsnow.info/se/archive_v2.1/{}/D4SC/"
start_year = 2003
end_year = 2003
for year in range(start_year, end_year + 1):
year_url = url.format(year)
# get the html of the directory listing
x = urllib2.urlopen(year_url).read()
# Get all words starting with GlobSnow and ending with .nc.gz, ? - means non-greedy
fnames = re.findall(r"GlobSnow.*?\.nc\.gz", x)
print len(fnames)
fnames = set(fnames) # Eliminate duplicates
print len(fnames)
for fname in fnames:
if os.path.isfile(fname): # No need to download the same file several times
continue
with open(fname, "w") as f:
flink = os.path.join(year_url, fname)
print "Downloading {} ....".format(flink)
f.write(urllib2.urlopen(flink).read())
print "Downloaded data for year {}".format(year)
print "All downloads finished successfully"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment