Skip to content

Instantly share code, notes, and snippets.

@s03311251
Last active February 8, 2024 18:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save s03311251/bb54f41186404ddd0d6627f35cef4413 to your computer and use it in GitHub Desktop.
Save s03311251/bb54f41186404ddd0d6627f35cef4413 to your computer and use it in GitHub Desktop.
Replace Wikipedia links in a given language with links in another language using the Wikipedia API
import mwparserfromhell
import sys
translation_title = {
'Infobox Unternehmen': 'infobox company',
'Infobox Connector': 'infobox connector'
}
translation_param = {
'Infobox Unternehmen': {
'Name': 'name',
'Logo': 'logo',
'Unternehmensform': 'type',
'Gründungsdatum': 'founded',
'Sitz': 'hq_location',
'Leitung': 'key_people',
'Mitarbeiterzahl': 'num_employees',
'Umsatz': 'revenue',
'Stand': 'revenue_year',
'Branche': 'industry',
'Homepage': 'website'
},
'Infobox Connector': {
'Name': 'connector_name',
}
}
def translate_template(template):
translated_template_name = translation_title.get(template.name.strip(), template.name.strip())
translated_template = f"{{{{{translated_template_name}\n"
for param in template.params:
template_mapping = translation_param.get(template.name.strip(), {})
if param.name.strip() in template_mapping:
translated_name = template_mapping[param.name.strip()]
translated_value = param.value
max_param_length = max(len(name) for name in template_mapping.values())
translated_template += f"| {translated_name.ljust(max_param_length)} = {translated_value}"
translated_template += "\n}}"
return translated_template
def translate_wiki_text(wiki_text):
wikicode = mwparserfromhell.parse(wiki_text)
for template in wikicode.filter_templates():
translated_template_name = translation_title.get(template.name.strip(), template.name.strip())
if template.name.strip() in translation_param:
translated_template = translate_template(template)
wikicode.replace(template, mwparserfromhell.parse(translated_template))
return str(wikicode)
if __name__ == "__main__":
# Read from stdin
input_wiki_text = sys.stdin.read()
# Translate wiki text
translated_wiki_text = translate_wiki_text(input_wiki_text)
# Write to stdout
sys.stdout.write(translated_wiki_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment