guziy/download.py

## download.py
import urllib2
import re
import os

url = "http://www.globsnow.info/se/archive_v2.1/{}/D4SC/"

start_year = 2003
end_year = 2003
for year in range(start_year, end_year + 1):
    year_url = url.format(year)
     # get the html of the directory listing
    x = urllib2.urlopen(year_url).read()
    # Get all words starting with GlobSnow and ending with .nc.gz, ? - means non-greedy
    fnames = re.findall(r"GlobSnow.*?\.nc\.gz", x)
    print len(fnames)
    fnames = set(fnames) # Eliminate duplicates
    print len(fnames)

    for fname in fnames:
        if os.path.isfile(fname): # No need to download the same file several times
            continue
        with open(fname, "w") as f:
            flink = os.path.join(year_url, fname)
            print "Downloading {} ....".format(flink)
            f.write(urllib2.urlopen(flink).read())

    print "Downloaded data for year {}".format(year)


print "All downloads finished successfully"
	import urllib2
	import re
	import os

	url = "http://www.globsnow.info/se/archive_v2.1/{}/D4SC/"

	start_year = 2003
	end_year = 2003
	for year in range(start_year, end_year + 1):
	year_url = url.format(year)
	# get the html of the directory listing
	x = urllib2.urlopen(year_url).read()
	# Get all words starting with GlobSnow and ending with .nc.gz, ? - means non-greedy
	fnames = re.findall(r"GlobSnow.*?\.nc\.gz", x)
	print len(fnames)
	fnames = set(fnames) # Eliminate duplicates
	print len(fnames)

	for fname in fnames:
	if os.path.isfile(fname): # No need to download the same file several times
	continue
	with open(fname, "w") as f:
	flink = os.path.join(year_url, fname)
	print "Downloading {} ....".format(flink)
	f.write(urllib2.urlopen(flink).read())

	print "Downloaded data for year {}".format(year)


	print "All downloads finished successfully"