Skip to content

Instantly share code, notes, and snippets.

@j08lue
Last active December 16, 2015 06:08
Show Gist options
  • Save j08lue/5388938 to your computer and use it in GitHub Desktop.
Save j08lue/5388938 to your computer and use it in GitHub Desktop.
Automatize the download of eSurge data from vannstand.no
"""Automatize the download of eSurge data from vannstand.no
Troubleshooting
===============
If the script does not work anymore, this is likely to be
due to changes in the website data request form.
Most sensitive: field names (e.g. 'tid', 'aar', etc.).
Check the website source code.
Note
====
The website is
http://vannstand.no/index.php/nb/vannstandsdata/malt-vannstand
but the data request form is actually in an iframe with src
http://vannstand.no/vannscripts/skjema2Ny.php
"""
import mechanize
import datetime
import copy
import os.path
# we need a dictionary with places and their order in the drop-down menue
# options for 'sted' in the HTML code:
"""
<option value="1">Andenes</option>
<option value="2">Bergen</option>
<option value="3">Bod&#248;</option>
<option value="4">Hammerfest</option>
<option value="5">Harstad</option>
<option value="6">Heimsj&#248;</option>
<option value="7">Helgeroa</option>
<option value="8">Honningsv&#229;g</option>
<option value="9">Kabelv&#229;g</option>
<option value="10">Kristiansund</option>
<option value="11">M&#229;l&#248;y</option>
<option value="12">Narvik</option>
<option value="13">Ny-&#197;lesund</option>
<option value="14">Oscarsborg</option>
<option value="15">Oslo</option>
<option value="16">R&#248;rvik</option>
<option value="17">Stavanger</option>
<option value="18">Tregde</option>
<option value="19">Troms&#248;</option>
<option value="20">Trondheim</option>
<option value="21">Vard&#248;</option>
<option value="22">Viker</option>
<option value="23">&#197;lesund</option>
"""
# type that into a dictionary:
location_names = {
'1' : 'Andenes',
'2' : 'Bergen',
'3': 'Bodoe',
'4': 'Hammerfest',
'5': 'Harstad',
'6': 'Heimsjoe',
'7' : 'Helgeroa',
'8': 'Honningsvaeg',
'9': 'Kabelvaeg',
'10': 'Kristiansund',
'11' : 'Maaloey',
'12': 'Narvik',
'13': 'NyAalesund',
'14': 'Oscarsborg',
'15': 'Oslo',
'16': 'Roervik',
'17': 'Stavanger',
'18': 'Tregde',
'19': 'Tromsoe',
'20': 'Trondheim',
'21': 'Vardoe',
'22': 'Viker',
'23': 'Aalesund'}
def download_from_vannstand(options={},
defaults=dict(
sted = ['1'],
tid = ['01'],
mnd = ['1'],
aar = ['1992'],
antall_dager = ['30'],
nivaa = ['2'],
typedata = ['1']),
starturl='http://vannstand.no/vannscripts/skjema2Ny.php',
outDir='.'):
"""Download water level data from vannstand.no
Parameters
==========
options : dict
options to be filled into website form
defaults : dict
defaults for options. Are overwritten with items in options
starturl : str
url to website
outdir : str
output directory
"""
# open browser
br = mechanize.Browser()
br.open(starturl)
# select the form
br.select_form(nr=0)
br.form.set_all_readonly(False)
#print br.form
# combine defaults and options
allopts = dict(defaults.items() + options.items())
# set options
for key,opt in allopts.iteritems():
# make sure all options are lists
if not isinstance(opt,list):
allopts[key] = [opt]
br[key] = [opt]
else:
br[key] = opt
# submit request
print 'Requesting data ...'
datasite = br.submit()
# get download site link
print 'Browsing to download site ...'
dwnld_link = br.find_link(text='Last ned fil')
br.follow_link(dwnld_link)
# get file link
print 'Finding link to file ...'
file_link = br.find_link(
text_regex='Beregnet tidevann, observert vannstand og v\xc3\xa6rets virkning')
file_url = '/'.join([file_link.base_url,file_link.url])
# define output file name
loc = location_names[allopts['sted'][0]]
date = datetime.datetime(year=int(allopts['aar'][0]),
month=int(allopts['mnd'][0]),
day=int(allopts['tid'][0]))
datestr = date.strftime('%Y%m%d')
fname = 'eSurge_{}_{}_{}days.dat'.format(loc,datestr,allopts['antall_dager'][0])
fpath = os.path.join(outDir,fname)
# download file
print 'Downloading data to file {} ...'.format(fpath)
resp = br.follow_link(file_link)
with open(fpath,'w') as fout:
fout.write(resp.read())
br.close()
def run_in_intervals(locations=['23','7'],
firstdate=datetime.datetime(1992,1,1),
lastdate=datetime.datetime(2012,12,31),
interval=datetime.timedelta(days=30),
**kwargs):
"""Run the data download from *firstdate* to *lastdate* in steps of *interval*
Parameters
==========
locations : list of str
water level locations (see top of this script)
make sure that names are set in location_names
**kwargs : optional keyword arguments
passed on to dowload_from_vannstand()
"""
for loc in locations:
print 'Downloading data for'
print '... location: {}'.format(loc)
# reset
options = {}
date = copy.deepcopy(firstdate)
# set number of days
options['antall_dager'] = '{}'.format(interval.days)
# set location
options['sted'] = loc
while date <= lastdate:
print '... date: {}'.format(date)
# set time options
options['aar'] = '{}'.format(date.year)
options['mnd'] = '{}'.format(date.month)
options['tid'] = '{:02d}'.format(date.day)
# run download
download_from_vannstand(options=options,**kwargs)
date += interval
if __name__ == "__main__":
run_in_intervals(outDir='.')
download_from_vannstand(outDir='.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment