Skip to content

Instantly share code, notes, and snippets.

@libkoi
Created June 9, 2022 13:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save libkoi/e98a2cf17ecb6fe398e4c26210d1e443 to your computer and use it in GitHub Desktop.
Save libkoi/e98a2cf17ecb6fe398e4c26210d1e443 to your computer and use it in GitHub Desktop.
import pywikibot
from tqdm import tqdm
import re
from datetime import datetime
site = pywikibot.Site('zh', 'wikipedia', user='Crystal-bot')
site.login()
BLACKLIST = [
'互联网档案馆',
'机器翻译',
'维基百科',
'人称'
]
MIN = 5
s_curtime = datetime.utcnow()
s_curtime_ts = s_curtime.strftime('%Y-%m-%d %H:%M:%S')
print("==========")
print(f"[INFO] {s_curtime_ts} Searching articles with \"dead_end\" template.")
template = pywikibot.Page(pywikibot.Link('Dead_end', default_namespace=10, source=site))
gen = template.getReferences(only_template_inclusion=True, namespaces=0)
count = 0
matched = 0
for page in tqdm(gen):
count += 1
i_links = list(page.linkedPages(namespaces=0))
for _page in i_links:
for name in BLACKLIST:
if _page.title() == name:
i_links.remove(_page)
for _page in i_links:
if not _page.exists():
i_links.remove(_page)
link_count = len(i_links)
if link_count == 0:
continue
if link_count >= MIN:
matched += 1
# print(f"{page.title()} -> {str(link_count)}")
page.text = re.sub(r"\{\{([Dd]ead ?end|斷[鏈連聯]頁面)\|.*?\}\}", r"", page.text)
#page.save("机器人: 移除含有内部链接条目中的[[Template:Dead end]]模板")
e_curtime = datetime.utcnow()
e_curtime_ts = e_curtime.strftime('%Y-%m-%d %H:%M:%S')
elapse = e_curtime - s_curtime
print(f"[INFO] {e_curtime_ts} Found {str(matched)} articles in {str(count)} articles, replaced \"dead_end\" template in {elapse.total_seconds():.2f} seconds, sleeping...")
print("==========\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment