Skip to content

Instantly share code, notes, and snippets.

@JJMC89
Last active July 14, 2020 04:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JJMC89/d62c412f6fdceed02aa0b20ec728a6c6 to your computer and use it in GitHub Desktop.
Save JJMC89/d62c412f6fdceed02aa0b20ec728a6c6 to your computer and use it in GitHub Desktop.
Builds a list of rural localities by district/okrug
python3 rus_loc.py -catr:'Rural localities in Arkhangelsk Oblast'
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Builds a list of rural localities by district/okrug.
&params;
"""
# Author : JJMC89
# License: MIT
import copy
import mwparserfromhell
import pywikibot
from pywikibot.pagegenerators import GeneratorFactory, parameterHelp
from pywikibot.textlib import removeDisabledParts
docuReplacements = {'&params;': parameterHelp} # pylint: disable=invalid-name
CONFIG = {
'Infobox settlement': 'subdivision_name3',
'Infobox Russian inhabited locality': 'adm_district_jur',
}
def get_template_pages(templates, site=None):
"""
Given an iterable of templates, return a set of pages.
@param templates: iterable of templates
@type templates: iterable
@rtype: set
"""
pages = set()
if not site:
site = pywikibot.Site()
for template in templates:
if isinstance(template, str):
template = pywikibot.Page(site, template, ns=10)
if template.isRedirectPage():
template = template.getRedirectTarget()
if not template.exists():
continue
pages.add(template)
for tpl in template.backlinks(filterRedirects=True):
pages.add(tpl)
return pages
def get_district(page):
"""Get the district fro the article lead."""
wikicode = mwparserfromhell.parse(
removeDisabledParts(page.text, site=page.site), skip_style_tags=True
)
sections = wikicode.get_sections(include_lead=True)
lead = sections[0]
district = _get_district_from_infobox(lead, page.site)
if not district:
district = _get_district_from_links(lead, page.site)
return district.title() if district else '_Unknown_'
def _get_district_from_infobox(wikicode, site):
"""Helper function for get_district."""
value = None
for tpl in wikicode.ifilter_templates():
template = pywikibot.Page(site, str(tpl.name), ns=10)
if template in CONFIG and tpl.has(CONFIG[template], ignore_empty=True):
value = str(tpl.get(CONFIG[template]).value)
break
if value:
if '[[' in value:
value = mwparserfromhell.parse(value, skip_style_tags=True)
value = _get_district_from_links(value, site)
else:
value = pywikibot.Page(site, value)
return value
def _get_district_from_links(wikicode, site):
"""Helper function for get_district."""
district = okrug = None
for link in wikicode.ifilter_wikilinks():
page = pywikibot.Page(site, str(link.title))
title = page.title().lower()
if 'district' in title:
district = district or page
elif 'okrug' in title:
okrug = okrug or page
return district or okrug
def main(*args):
"""
Process command line arguments and invoke bot.
@param args: command line arguments
@type args: list of unicode
"""
local_args = pywikibot.handle_args(args)
site = pywikibot.Site()
site.login()
gen_factory = GeneratorFactory(site)
for arg in local_args:
gen_factory.handleArg(arg)
for infobox, param in copy.copy(CONFIG).items():
pages = get_template_pages([infobox], site)
for page in pages:
CONFIG[page] = param
storage = dict()
for page in gen_factory.getCombinedGenerator():
if 'list' in page.title().lower():
continue
district = get_district(page)
if district not in storage:
storage[district] = set()
storage[district].add(page)
text = ''
for district in sorted(storage.keys()):
text += '== {0} ==\nRural localities in [[{0}]]:\n'.format(district)
text += '{{div col|colwidth=15em}}\n'
for locality in sorted(storage[district]):
title = locality.title()
if ',' in title or '(' in title:
text += '* [[{}|]]\n'.format(locality.title())
else:
text += '* {}\n'.format(locality.title(as_link=True))
text += '{{div col end}}\n\n'
file = open('rus_loc.txt', 'w', encoding='utf8')
file.write(text)
file.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment