Skip to content

Instantly share code, notes, and snippets.

@davosian
Forked from ahue/groceries_wiktionary
Created July 19, 2021 14:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davosian/cdda3f88b3307014e32b46d61236133d to your computer and use it in GitHub Desktop.
Save davosian/cdda3f88b3307014e32b46d61236133d to your computer and use it in GitHub Desktop.
rhasspy slot program to get german groceries from wiktionary into rhasspy
#! /usr/bin/python3
try:
# For Python 3.0 and later
from urllib.request import urlopen
from urllib.parse import quote
except ImportError:
# Fall back to Python 2's urllib2
from urllib2 import urlopen
from urllib2 import quote
import json
def get_jsonparsed_data(url):
"""
Receive the content of ``url``, parse it as JSON and return the object.
Parameters
----------
url : str
Returns
-------
dict
"""
response = urlopen(url)
data = response.read().decode("utf-8")
return json.loads(data)
appendices = [
"Verzeichnis:Deutsch/Essen_und_Trinken/Lebensmittel",
"Verzeichnis:Deutsch/Essen_und_Trinken/Obst_und_Gem%C3%BCse",
"Verzeichnis:Deutsch/Essen_und_Trinken/Speisen",
"Verzeichnis:Deutsch/Essen_und_Trinken/Getr%C3%A4nke"
]
base_url = "https://de.wiktionary.org/w/api.php?format=json&action=query&titles={}&prop=links&formatversion=2&pllimit=500"
plcont = "&plcontinue={}"
words = []
for appendix in appendices:
# print(appendix)
url = base_url.format(appendix)
url2 = url
while True:
# print(url2)
res = get_jsonparsed_data(url2)
# print(res)
words += [d["title"] for d in res["query"]["pages"][0]["links"] if ":" not in d["title"]]
if not "continue" in res:
break;
url2 = url + plcont.format(quote(res["continue"]["plcontinue"]))
print("\n".join(words))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment