-
-
Save libkoi/6bd3b2568904a1eebe7a9e900c5a1b01 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
import pywikibot | |
from pywikibot.pagegenerators import SearchPageGenerator | |
from tqdm import tqdm | |
site = pywikibot.Site('zh', 'wikipedia', user='Crystal-bot') | |
site.login() | |
# cat = pywikibot.Category(site, '小行星带天体') | |
# pages = cat.articles() | |
gen = SearchPageGenerator("insource: \"<br />\"", site=site, namespaces=[0]) | |
count = 0 | |
for page in tqdm(gen): | |
re0 = re.search(r"semimajor *=[ \w]+km<br ?/?> ?", page.text, re.I) | |
if re0 == None: | |
continue | |
count += 1 | |
print("Title = " + page.title()) | |
page.text = re.sub(r"semimajor( *?)=([ \w]+?)km<br ?/?> ?", r"semimajor\1=\2km<br />", page.text) | |
page.text = re.sub(r"<br ?/?> ?", r",", page.text) | |
page.save("机器人: 清理小行星条目中的换行符", botflag=True) | |
for page in tqdm(gen): | |
print("Title = " + page.title()) | |
re0 = re.search(r"demonym *=[\u0000-\uFFFF]+<br />", page.text, re.I) | |
if re0 == None: | |
continue | |
count += 1 | |
print("Title = " + page.title()) | |
page.text = re.sub(r"demonym( *?)=([\u0000-\uFFFF]+?)<br />([\u0000-\uFFFF]+?)<br />([\u0000-\uFFFF]+?)\|", r"demonym\1=\2, \3, \4|", page.text) | |
page.text = re.sub(r"demonym( *?)=([\u0000-\uFFFF]+?)<br />([\u0000-\uFFFF]+?)\|", r"demonym\1=\2, \3|", page.text) | |
page.save("机器人: 清理挪威市镇条目中的换行符", botflag=True) | |
print(f"Fixed {count} articles") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment