Last active
May 20, 2018 04:32
-
-
Save hesyifei/00f6ee0890ac3477b58e4d6b9c712fc2 to your computer and use it in GitHub Desktop.
Pywikibot Script: Remove deprecated {{Persondata}} templates for Chinese Wikipedia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# https://zh.wikipedia.org/wiki/User:Eflybot/task/1 | |
from __future__ import absolute_import, unicode_literals | |
import pywikibot | |
from pywikibot.tools.formatter import color_format | |
from requests import get | |
import re | |
import time | |
import mwparserfromhell | |
def get_qnumber(wikiarticle, wikisite): | |
resp = get('https://www.wikidata.org/w/api.php', { | |
'action': 'wbgetentities', | |
'titles': wikiarticle, | |
'sites': wikisite, | |
'props': '', | |
'format': 'json' | |
}).json() | |
return list(resp['entities'])[0] | |
def ReferringPageGenerator(referredPage, followRedirects=False, | |
withTemplateInclusion=True, | |
onlyTemplateInclusion=False, | |
total=None, content=False): | |
"""Yield all pages referring to a specific page.""" | |
return referredPage.getReferences( | |
follow_redirects=followRedirects, | |
withTemplateInclusion=withTemplateInclusion, | |
onlyTemplateInclusion=onlyTemplateInclusion, | |
total=total, content=content) | |
site = pywikibot.Site() | |
page = pywikibot.Page(pywikibot.Link('Persondata', | |
defaultNamespace=10, | |
source=site)) | |
gen = ReferringPageGenerator(page, onlyTemplateInclusion=True) | |
site.login() | |
count = 0 | |
for page in gen: | |
'''if count >= 20: | |
break''' | |
print("\n===================\n") | |
qNumber = get_qnumber(wikiarticle=page.title(), wikisite="zhwiki") | |
jump = True | |
if (qNumber != -1) and (qNumber != '-1') and (page.namespace() == 0): | |
wikidatasite = pywikibot.Site("wikidata", "wikidata") | |
repo = wikidatasite.data_repository() | |
item = pywikibot.ItemPage(repo, qNumber) | |
item.get() | |
if 'enwiki' in item.sitelinks: | |
print('Have English Wikipedia article (Wikidata ID ' + | |
qNumber + '). Adding...') | |
count += 1 | |
jump = False | |
'''if not page.title().startswith("User:Eflybot"): | |
jump = True''' | |
if jump: | |
print("Will not update: " + page.title()) | |
continue | |
text = page.get() | |
wikicode = mwparserfromhell.parse(text) | |
templates = wikicode.filter_templates() | |
for template in templates: | |
if template.name.matches("Persondata"): | |
wikicode.replace( | |
template, "{{MY_TEMPORARY_TEMPLATE_PLZ_DONT_REPEAT_314159265358}}") | |
text = re.sub( | |
r'({{MY_TEMPORARY_TEMPLATE_PLZ_DONT_REPEAT_314159265358}}|<!-- Metadata: see \[\[Wikipedia:Persondata\]\] -->)((\n)*)', '', str(wikicode)) | |
pywikibot.output(color_format( | |
'\n>>> {lightpurple}{0}{default} <<<', "No. " + str(count) + ": " + page.title())) | |
pywikibot.showDiff(page.text, text) | |
time.sleep(3) | |
page.text = text | |
print("Start saving...") | |
page.save( | |
summary="[[User:Eflybot/task/1|機器人]]移除過時的[[Wikipedia:個人資訊|Persondata模板]]") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment