Skip to content

Instantly share code, notes, and snippets.

@yvesf
Created June 7, 2015 12:32
Show Gist options
  • Save yvesf/50ae9fe28f319b1c4d16 to your computer and use it in GitHub Desktop.
Save yvesf/50ae9fe28f319b1c4d16 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import json
import time
import netrc
import base64
import codecs
import urllib.request
import urllib.parse
from funcparserlib.lexer import make_tokenizer, Token, LexerError
from funcparserlib.parser import (some, a, maybe, many, finished, skip)
""" Make an entry in .netrc like:
> machine ftp-outgoing2.dwd.de
> login gds29XXX
> password ASDFqwerty """
BASE_URL = "ftp://{}:{}@ftp-outgoing2.dwd.de/" + \
"gds/specials/forecasts/tables/germany/{}"
def tokenize(str):
"""str -> Sequence(Token)"""
specs = [
('Separator', (r'\-+',)),
('Space', (r'[ \t\r\n,]+',)),
('NL', (r'[\r\n]+',)),
('Date', (r'[0-9]{2}\.[0-9]{2}\.[0-9]{4}',)),
('Header', (r'^Vorhersage .*für',)),
('Footer', (r'Temperatur .*',)),
('DateText', (r'[A-z][a-z]+ (früh|spät)',)),
('Place', (r'(?<! )[A-Za-z\200-\377_][A-Za-z\200-\377_0-9-\.]*',)),
('Text',
(r'[A-Za-z\200-\377_][A-Za-z\200-\377_0-9-\.]*( ?[A-Za-z\200-\377_0-9-\.]+)*',)),
('Number', (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),
]
useless = ['Separator', 'NL', 'Space', 'Header', 'Footer']
t = make_tokenizer(specs)
return [x for x in t(str) if x.type not in useless]
def parse(tokens):
def make_date(args):
(dateText, date) = args
date = time.mktime(time.strptime(date.value, '%d.%m.%Y'))
if "früh" in dateText.value:
date += 60 * 60 * 9 # + 09:00
elif "spät" in dateText.value:
date += 60 * 60 * 18 # + 18:00
else:
raise Exception("Invalid dateText: {}".format(dateText.value))
return time.strftime("%FT%T%z", time.gmtime(date))
def make_place(args):
(place, degree, desc) = args
return {'name': place.value, 'degree': degree.value,
'description': list(map(lambda t: t.value, desc))}
def make_result(args):
(date, places) = args
return {'date': date, 'places': places}
tokval = lambda t: t.value
token = lambda type: some(lambda t: t.type == type)
date = token('DateText') + token('Date') >> make_date
place = token('Place') + token('Number') + \
many(token('Text')) >> make_place
toplevel = date + many(place) + skip(finished) >> make_result
return toplevel.parse(tokens)
def read_gds_forecast(name):
login, _, password = netrc.netrc().authenticators("ftp-outgoing2.dwd.de")
url = BASE_URL.format(login, password, name)
request = urllib.request.Request(url)
with urllib.request.urlopen(request) as f:
f2 = codecs.getreader('latin-1')(f)
return parse(tokenize(f2.read()))
def job(name):
return read_gds_forecast(name)
if __name__ == "__main__":
from pprint import pprint
for x in ["morgen", "uebermorgen", "Tag4"]:
for y in ["frueh", "spaet"]:
pprint(job("Daten_Suedwest_{}_{}".format(x, y)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment