import urllib2
import re
import os

url = "http://www.globsnow.info/se/archive_v2.1/{}/D4SC/"

start_year = 2003
end_year = 2003
for year in range(start_year, end_year + 1):
    year_url = url.format(year)
     # get the html of the directory listing
    x = urllib2.urlopen(year_url).read()
    # Get all words starting with GlobSnow and ending with .nc.gz, ? - means non-greedy
    fnames = re.findall(r"GlobSnow.*?\.nc\.gz", x) 
    print len(fnames)
    fnames = set(fnames) # Eliminate duplicates
    print len(fnames)

    for fname in fnames:
        if os.path.isfile(fname): # No need to download the same file several times
            continue
        with open(fname, "w") as f:
            flink = os.path.join(year_url, fname)
            print "Downloading {} ....".format(flink)
            f.write(urllib2.urlopen(flink).read())
    
    print "Downloaded data for year {}".format(year)


print "All downloads finished successfully"