from __future__ import print_function | |
import json | |
import re | |
import sys | |
import urllib2 | |
if len(sys.argv) < 2: | |
sys.exit('Abfrage mit: %s Substantiv' % sys.argv[0]) | |
substantiv = sys.argv[1] | |
try: | |
r = urllib2.urlopen( | |
'http://de.wiktionary.org/w/api.php?' + | |
'format=json' + | |
'&action=query' + | |
'&prop=revisions' + | |
'&rvprop=content' + | |
'&titles=' + | |
substantiv | |
).read() | |
except urllib2.URLError, e: | |
sys.exit('Problem beim Zugriff auf Wiktionary.') | |
content = json.loads(r) | |
page = content['query']['pages'][content['query']['pages'].keys()[0]] | |
if 'revisions' not in page.keys(): | |
sys.exit( | |
'Substantiv nicht im deutschsprachigen Wiktionary verzeichnet.' | |
) | |
wikitext = (page['revisions'][0]['*']) | |
match = re.search( | |
'===\s?{{Wortart\|Substantiv\|Deutsch}},\s?' + | |
'{{([mfn])([mfn])?([mfn])?}}' + | |
'(,\s?{{([mfn])}}(,\s?{{([mfn])}})?)*' + | |
'\s?===', | |
wikitext) | |
if match is None: | |
sys.exit('Kein Substantiv!') | |
genera = filter(lambda x: re.match('^[mfn]$', x), | |
filter(lambda x: x is not None, match.groups())) | |
m = ["der " + substantiv for genus in genera if genus == "m"] | |
f = ["die " + substantiv for genus in genera if genus == "f"] | |
n = ["das " + substantiv for genus in genera if genus == "n"] | |
artikel = m + f + n | |
map(print, artikel) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
johl commentedMay 4, 2013
$ python migrationsschatten.py Knoblauch
der Knoblauch
$ python migrationsschatten.py Ketchup
der Ketchup
das Ketchup
$ python migrationsschatten.py Joghurt
der Joghurt
das Joghurt
die Joghurt
$ python migrationsschatten.py finden
Kein Substantiv!
$ python migrationsschatten.py djshdjhsdowhdowde
Substantiv nicht im deutschsprachigen Wiktionary verzeichnet.