Skip to content

Instantly share code, notes, and snippets.

@Vesihiisi
Forked from lokal-profil/check_WLE_id.py
Created May 3, 2020 12:55
Show Gist options
  • Save Vesihiisi/1f40a0fea06c2c4f4f89df36a7a974b9 to your computer and use it in GitHub Desktop.
Save Vesihiisi/1f40a0fea06c2c4f4f89df36a7a974b9 to your computer and use it in GitHub Desktop.
Short script for reproducing WLM style reporting pages for Wikidata powered competitions (here WLE in Sweden)
# -*- coding: utf-8 -*-
# python check_WLE_id.py -live -dir:~/Projects/batchUploadTools/
"""Script for updating unused imges/unknonw ids pages for WLE on sv.wp."""
import pywikibot
import wikidataStuff.wdqsLookup as query
SETTING = {
'prop': 'P3613',
'formatter_url': 'http://skyddadnatur.naturvardsverket.se/sknat/?nvrid={}',
'cat': 'Category:Protected areas of Sweden with known IDs',
'unused_page': 'Wikipedia:Projekt naturgeografi/WLE/Oanvända bilder',
'unknown_page': 'Wikipedia:Projekt naturgeografi/WLE/Ogiltiga id',
'intro_subpage': 'intro',
'no_id_caption': '<no id>',
'lang': 'sv',
'project': 'wikipedia',
'edit_summary': 'Automatisk uppdatering av gallerier',
'biospheres': ["q1525232", "q3373690",
"q22680883", "q22680881",
"q22806736", "q85493403", "q86189513"]
}
# retrieve info from Wikidata
def process_query_data(raw_data):
"""Process the raw output of query.make_select_wdqs_query."""
data = {}
for entry in raw_data:
idno = entry['id']
image = False
if entry.get('P18'):
image = True
qid = entry['item'][len('http://www.wikidata.org/entity/'):]
if idno in data and data[idno]['qid'] != qid:
pywikibot.warning('duplicate idno for {}'.format(idno))
data[idno] = {'image': image, 'qid': qid}
return data
def quick_output(output_data):
"""A quick command line output format ."""
output = ''
for k, v in output_data.items():
k = k or SETTING.get('no_id_caption')
caption = k
if v.get('qid'):
caption += ' ({})'.format(v.get('qid'))
output += '{}\n'.format(caption)
output += '\t{}\n'.format('\n\t'.join(v.get('images')))
return output
def wikitext_output(output_data):
"""
Wikitext format suitable for consumption by volonteers.
All non-gallery information (categories, instructions) should be placed
in the intro subpage.
"""
output = '{{/%s}}\n\n' % SETTING.get('intro_subpage')
for k in sorted(output_data.keys()): # reduce amount of changes on page
v = output_data.get(k)
caption = '[{url} {id}]'.format(
url=SETTING.get('formatter_url').format(k),
id=k)
if not k:
caption = SETTING.get('no_id_caption')
if v.get('qid'):
caption += ' - {{Q|%s}}' % v.get('qid')
output += '==={}===\n'.format(caption)
output += '<gallery>\n'
output += '\n'.join(v.get('images'))
output += '\n</gallery>\n\n'
return output
def load_and_process():
"""Load items and images and identify unused images and unknown ids."""
# retrieve info from Wikidata
wle_items = process_query_data(
query.make_select_wdqs_query(
'?item wdt:{} ?id'.format(SETTING.get('prop')),
optional_props=['P18'], select_value='id', allow_multiple=True,
raw=True))
# retrieve info from Commons
commons = pywikibot.Site('commons', 'commons')
commons_data = {}
for member in pywikibot.data.api.QueryGenerator(
site=commons, list='categorymembers',
cmprop='title|sortkeyprefix', cmtitle=SETTING.get('cat')):
if member['sortkeyprefix'] not in commons_data:
commons_data[member['sortkeyprefix']] = []
commons_data[member['sortkeyprefix']].append(member['title'])
# process data
unknown_ids = {}
for k, v in commons_data.items():
if k not in wle_items and k.lower() not in SETTING["biospheres"]:
unknown_ids[k] = {
'images': v,
'qid': None
}
unused_images = {}
for k, v in commons_data.items():
if k in wle_items and not wle_items[k]['image']:
unused_images[k] = {
'images': v,
'qid': wle_items[k]['qid']
}
return (unknown_ids, unused_images)
def update_pages(unknown_ids, unused_images):
"""Update both the unused images and unknown ids wikipages."""
wiki = pywikibot.Site(SETTING.get('lang'), SETTING.get('project'))
# unknown ids
update_single_page(
pywikibot.Page(wiki, SETTING.get('unknown_page')),
wikitext_output(unknown_ids))
update_single_page(
pywikibot.Page(wiki, SETTING.get('unused_page')),
wikitext_output(unused_images))
def update_single_page(page, content):
"""Update a single wikipage."""
summary = SETTING.get('edit_summary')
if not isinstance(page, pywikibot.Page):
pywikibot.warning(
'Could not save page {0} because it is not a Page '
'instance.'.format(page))
try:
page.put(newtext=content, summary=summary)
except (pywikibot.exceptions.OtherPageSaveError,
pywikibot.exceptions.PageSaveRelatedError) as e:
pywikibot.warning(
'Could not save page {0} ({1}): {2}'.format(page, summary, e))
def handle_args(args):
"""Handle any local arguments and pass on generic pywikibot arguments."""
live = False
for arg in pywikibot.handle_args(args):
option, sep, value = arg.partition(':')
if option == '-live':
live = True
return live
def main(*args):
live = handle_args(args)
unknown_ids, unused_images = load_and_process()
if live:
update_pages(unknown_ids, unused_images)
else:
pywikibot.output(quick_output(unknown_ids))
pywikibot.output('-------------------------------------')
pywikibot.output(quick_output(unused_images))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment