Skip to content

Instantly share code, notes, and snippets.

@libkoi
Last active June 4, 2022 16:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save libkoi/6bd3b2568904a1eebe7a9e900c5a1b01 to your computer and use it in GitHub Desktop.
Save libkoi/6bd3b2568904a1eebe7a9e900c5a1b01 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import re
import pywikibot
from pywikibot.pagegenerators import SearchPageGenerator
from tqdm import tqdm
site = pywikibot.Site('zh', 'wikipedia', user='Crystal-bot')
site.login()
# cat = pywikibot.Category(site, '小行星带天体')
# pages = cat.articles()
gen = SearchPageGenerator("insource: \"<br />\"", site=site, namespaces=[0])
count = 0
for page in tqdm(gen):
re0 = re.search(r"semimajor *=[ \w]+km<br ?/?> ?", page.text, re.I)
if re0 == None:
continue
count += 1
print("Title = " + page.title())
page.text = re.sub(r"semimajor( *?)=([ \w]+?)km&lt;br ?/?&gt; ?", r"semimajor\1=\2km<br />", page.text)
page.text = re.sub(r"&lt;br ?/?&gt; ?", r",", page.text)
page.save("机器人: 清理小行星条目中的换行符", botflag=True)
for page in tqdm(gen):
print("Title = " + page.title())
re0 = re.search(r"demonym *=[\u0000-\uFFFF]+&lt;br /&gt;", page.text, re.I)
if re0 == None:
continue
count += 1
print("Title = " + page.title())
page.text = re.sub(r"demonym( *?)=([\u0000-\uFFFF]+?)&lt;br /&gt;([\u0000-\uFFFF]+?)&lt;br /&gt;([\u0000-\uFFFF]+?)\|", r"demonym\1=\2, \3, \4|", page.text)
page.text = re.sub(r"demonym( *?)=([\u0000-\uFFFF]+?)&lt;br /&gt;([\u0000-\uFFFF]+?)\|", r"demonym\1=\2, \3|", page.text)
page.save("机器人: 清理挪威市镇条目中的换行符", botflag=True)
print(f"Fixed {count} articles")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment