Skip to content

Instantly share code, notes, and snippets.

@NSBum
Created December 20, 2023 21:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NSBum/9e8cc55051bfb7e6cd6b2e129b75b3e0 to your computer and use it in GitHub Desktop.
Save NSBum/9e8cc55051bfb7e6cd6b2e129b75b3e0 to your computer and use it in GitHub Desktop.
Get wiki code of ru.wikitionary.org entry but only Russian language.
import re
import requests
def get_wiktionary_russian_content(word):
url = "https://ru.wiktionary.org/w/api.php"
params = {
"action": "query",
"format": "json",
"titles": word,
"prop": "revisions",
"rvprop": "content"
}
response = requests.get(url, params=params)
data = response.json()
# Extract page content
pages = data['query']['pages']
page_id = next(iter(pages)) # Get the first page id
content = pages[page_id]['revisions'][0]['*']
# Regular expression to match the Russian section
russian_section_pattern = r'= \{\{-ru-\}\} =(.+?)(?=^= \{\{-|\Z)'
match = re.search(russian_section_pattern, content, re.MULTILINE | re.DOTALL)
if match:
return match.group(1).strip() # Return the matched Russian content
else:
return "Russian content not found."
russian_content = get_wiktionary_russian_content("собака")
print(russian_content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment