Last active
June 10, 2021 06:37
-
-
Save Xzonn/1d02239c13f0d4029094f04f08379484 to your computer and use it in GitHub Desktop.
FixPageLinkNames
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MediaWiki.py | |
Data.py | |
__pycache__ | |
in.txt | |
out.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: UTF-8 -*- | |
import requests | |
import time, json, os | |
from MediaWiki import MediaWikiApi | |
from Data import DATA | |
from FixPageLinkNamesData import replaceSummary | |
from FixPageLinkNamesFunction import FixPageLinkNames | |
os.chdir(os.path.dirname(__file__)) | |
WEBSITE = "moegirl-bot" | |
PROXY = False | |
MAX_TRIES = 50 | |
SLEEP = 5 | |
if PROXY: | |
proxies = { | |
"http": "http://127.0.0.1:10809/", | |
"https": "http://127.0.0.1:10809/", | |
} | |
else: | |
proxies = {} | |
cookies = None | |
csrfToken = None | |
mwa = MediaWikiApi(**DATA[WEBSITE], proxies = proxies, timeout = 1200) | |
def Login(): | |
tries = MAX_TRIES | |
while tries > 0: | |
try: | |
if mwa.login() and mwa.cookies: | |
time.sleep(SLEEP) | |
break | |
except: | |
pass | |
time.sleep(SLEEP) | |
tries -= 1 | |
with open("FixPageLinkNamesTitle.txt", "r", -1, "utf-8") as f: | |
titles = sorted(set(f.read().split("\n"))) | |
with open("FixPageLinkNamesTitle.txt", "w", -1, "utf-8") as f: | |
f.write("\n".join(titles)) | |
Login() | |
for i in range(len(titles)): | |
title = titles[i] | |
tries = MAX_TRIES | |
while tries > 0: | |
try: | |
ori = mwa.getPageContent(title, redirects=1) | |
if ori and len(ori) > 0 and "content" in ori[0]: | |
time.sleep(SLEEP) | |
break | |
except: | |
pass | |
time.sleep(SLEEP) | |
tries -= 1 | |
else: | |
print("GetContentErr. Title:", title) | |
continue | |
if len(ori[0]["content"]) < 0: | |
continue | |
edi = FixPageLinkNames(ori[0]["content"]) | |
if edi == ori[0]["content"]: | |
print("No Changes. Title:", title) | |
continue | |
tries = MAX_TRIES | |
while tries > 0: | |
try: | |
r = mwa.editPageContent(ori[0]["title"], edi, replaceSummary, tags="Bot") | |
if r and "error" not in r: | |
print("Edit", r["title"], r["result"], "(%d/%d)" % (i + 1, len(titles))) | |
with open("FixPageLinkNamesTitle.txt", "w", -1, "utf-8") as f: | |
f.write("\n".join(titles[i + 1:])) | |
time.sleep(SLEEP) | |
break | |
elif r and "error" in r: | |
if r["error"]["code"] == "permissiondenied": | |
Login() | |
except: | |
pass | |
time.sleep(SLEEP) | |
tries -= 1 | |
else: | |
if "error" in r: | |
print("EditErr. Code: " + r["error"]["code"] + ". Info: " + r["error"]["info"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: UTF-8 -*- | |
import re | |
VERSION = 15 | |
# 以下内容会被全文替换 | |
replaceGeneralList = [ | |
# 宝可梦 | |
[r"(神奇宝贝|神奇寶貝|宠物小精灵|寵物小精靈|口袋妖怪|精灵宝可梦|精靈寶可夢)", r"宝可梦"], | |
[r"(宝可梦|寶可夢)[_ ](太阳[//]|太陽/[//]|Let|究极|究極)", r"精灵宝可梦 \2"], | |
[r"\[\[宝可梦\]\]", r"[[宝可梦系列|宝可梦]]"], | |
[r"\[\[宝可梦\|", r"[[宝可梦系列|"], | |
[r"(宝可梦|寶可夢)[ _]?(起源|THE ORIGIN)", r"宝可梦 THE ORIGIN", re.I], | |
[r"(宝可梦|寶可夢)(\(2019\)|[ _]旅途\|(寶可夢|宝可梦))", r"宝可梦 旅途"], | |
[r"(宝可梦|寶可夢)[ _]?世代", r"宝可梦世代"], | |
[r"(宝可梦|寶可夢)[ _]?XY", r"宝可梦 XY"], | |
[r"宝可梦 XY[ _]?&(amp;)?[ _]Z", r"宝可梦 XY&Z"], | |
[r"(宝可梦|寶可夢)[ _]?(超级愿望|超級願望|Best[ _]?Wishes)[!!]*", r"宝可梦 超级愿望", re.I], | |
[r"宝可梦 超级愿望[ _]?(season[ _]?2|第[22].)", r"宝可梦 超级愿望 第2季", re.I], | |
[r"(?<=第2季、).*?(season[ _]?2|第[22].)", r"第2季", re.I], | |
[r"(宝可梦|寶可夢)[ _]?(钻石|鑽石)[&&与與]珍珠", r"宝可梦 钻石&珍珠"], | |
[r"(宝可梦|寶可夢)[ _]?超世代", r"宝可梦 超世代"], | |
[r"(?:宝可梦|寶可夢)系列\|((?:宝可梦|寶可夢) (?:XY|超级愿望|钻石&珍珠|超世代|THE ORIGIN))", r"\1|\1"], | |
[r"(?:宝可梦|寶可夢)系列\|((?:宝可梦|寶可夢)(?:世代))", r"\1|\1"], | |
[r"\[\[宝可梦系列\]\](|》)系列", r"[[宝可梦系列|宝可梦]]\1系列"], | |
[r"宝可梦系列\|数码", r"数码宝贝系列|数码"], | |
[r"(莉莉艾|水莲|玛奥|水蓮|瑪奧)\((動畫|动画)\)", r"\1"], | |
[r"(宝可梦|寶可夢)\(歌曲\)", r"宠物小精灵(歌曲)"], | |
[r"game *freak", r"GAME FREAK", re.I], | |
[r"(宝可梦|寶可夢)/", r"宝可梦系列/"], | |
[r"(宝可梦|寶可夢)(特別篇|特别篇)", r"精灵宝可梦特别篇"], | |
[r"宝可梦百科", r"神奇宝贝百科"], | |
[r"宝可梦吧", r"口袋妖怪吧"], | |
] + [ | |
# 游戏主机名 | |
[r"\[\[Xbox[ _]?(360|One)娘?(?=[\|\]])", r"[[Xbox \1", re.I], | |
[r"X[Bb][Oo][Xx](360|One|Series)", r"Xbox \1"], | |
[r"\[\[PlayStation[ _]?5娘?(?=[\|\]])", r"[[PlayStation 5", re.I], | |
[r"\[\[PS([2345])(?=娘?[\|\]])", r"[[PlayStation \1"], | |
[r"\[\[PSP(?=娘?[\|\]])", r"[[PlayStation Portable"], | |
[r"\[\[PSV(?:[Ii][Tt][Aa])?(?=娘?[\|\]])", r"[[PlayStation Vita"], | |
[r"PlayStation([2345]|Portable|Vita)", r"PlayStation \1", re.I], | |
[r"\[\[3DS娘?(?=[\|\]])", r"[[Nintendo 3DS", re.I], | |
[r"\[\[N(?:intendo[ _]?)?DS娘?(?=[\|\]])", r"[[Nintendo DS", re.I], | |
[r"\[\[G(?:ame[ _]?)?B(?:oy[ _]?)?A(?:dvance)?娘?(?=[\|\]])", r"[[Game Boy Advance", re.I], | |
[r"\[\[G(?:ame[ _]?)?B(?:oy[ _]?)?C(?:olor)?娘?(?=[\|\]])", r"[[Game Boy Color", re.I], | |
[r"\[\[G(?:ame[ _]?)?B(?:oy[ _]?)?娘?(?=[\|\]])", r"[[Game Boy", re.I], | |
[r"Game[ _]?[Bb]oy", r"Game Boy"], | |
[r"\[\[Wii[ _]?U娘?(?=[\|\]])", r"[[Wii U", re.I], | |
[r"WiiU", r"Wii U"], | |
[r"\[\[Wii[ _]?娘?(?=[\|\]])", r"[[Wii", re.I], | |
[r"\[\[N(?:intendo[ _]?)?G(?:ame[ _]?)?C(?:ub)?娘?(?=[\|\]])", r"[[Nintendo GameCube", re.I], | |
[r"Game[ _]?[Cc]ube", r"GameCube"], | |
[r"\[\[N(?:intendo[ _]?)?64娘?(?=[\|\]])", r"[[Nintendo 64", re.I], | |
[r"\[\[S(?:uper[ _]?)?F(?:ami[ _]?)?C(?:om)?娘?(?=[\|\]])", r"[[Super Famicom", re.I], | |
[r"Super Family Computer", r"Super Famicom", re.I], | |
[r"\[\[F(?:amily[ _]?)?C(?:omputer)?娘?(?=[\|\]])", r"[[Family Computer", re.I], | |
[r"任天堂[ _]?(GameCube|Switch|DS|3DS)", r"Nintendo \1", re.I], | |
] + [ | |
# 任天堂相关 | |
[r"超级马里奥", "超级马力欧"], | |
[r"(马里奥|马力欧)赛车", "马力欧卡丁车"], | |
[r"(塞尔达|塞爾達|薩爾達|萨尔达)[传傳伝][说說説][ _::](时之笛|時光之笛)", "塞尔达传说 时光之笛"], | |
[r"(塞尔达|塞爾達|薩爾達|萨尔达)[传傳伝][说說説][ _::](梦见岛|夢見島)", "塞尔达传说 织梦岛"], | |
[r"(塞尔达|塞爾達|薩爾達|萨尔达)[传傳伝][说說説][ _::](天空之剑|天空之劍)", "塞尔达传说 御天之剑"], | |
[r"(塞尔达|塞爾達|薩爾達|萨尔达)[传傳伝][说說説][ _::].*?(假面|面具)", "塞尔达传说 魔吉拉的面具"], | |
[r"火焰纹章", "火焰之纹章"], | |
[r"(马力欧|塞尔达传说|火焰之纹章)[ _::]", r"\1 "] | |
] + [ | |
# 名侦探柯南 | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(摩天楼|摩天樓|摩天大楼|摩天大樓)\]\]", "[[名侦探柯南/剧场版/计时引爆摩天楼|名侦探柯南 计时引爆摩天楼]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(个目标|個目標)\]\]", "[[名侦探柯南/剧场版/第14个目标|名侦探柯南 第14个目标]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(世纪末|世紀末)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/世纪末的魔术师|名侦探柯南 世纪末的魔术师]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(暗杀者|暗殺者)\]\]", "[[名侦探柯南/剧场版/瞳孔中的暗杀者|名侦探柯南 瞳孔中的暗杀者]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(倒计时|倒計時|倒数计时|倒數計時)\]\]", "[[名侦探柯南/剧场版/通往天国的倒计时|名侦探柯南 通往天国的倒计时]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(贝克街|貝克街)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/贝克街的亡灵|名侦探柯南 贝克街的亡灵]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(迷宫|迷宮)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/迷宫的十字路口|名侦探柯南 迷宫的十字路口]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(银翼|銀翼)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/银翼的魔术师|名侦探柯南 银翼的魔术师]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(水平线|水平線)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/水平线上的阴谋|名侦探柯南 水平线上的阴谋]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(镇魂歌|鎮魂歌)\]\]", "[[名侦探柯南/剧场版/侦探们的镇魂歌|名侦探柯南 侦探们的镇魂歌]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(绀碧|紺碧|蔚蓝|蔚藍)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/绀碧之棺|名侦探柯南 绀碧之棺]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(战栗|戰慄)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/战栗的乐谱|名侦探柯南 战栗的乐谱]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(漆黑的)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/漆黑的追踪者|名侦探柯南 漆黑的追踪者]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(天空的)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/天空的遇难船|名侦探柯南 天空的遇难船]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(沉默的|沈默的)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/沉默的15分钟|名侦探柯南 沉默的15分钟]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(前锋|前鋒)\]\]", "[[名侦探柯南/剧场版/第11个前锋|名侦探柯南 第11个前锋]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(海的侦|海的偵)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/绝海的侦探|名侦探柯南 绝海的侦探]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(狙击手|狙擊手)\]\]", "[[名侦探柯南/剧场版/异次元的狙击手|名侦探柯南 异次元的狙击手]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(向日葵)\]\]", "[[名侦探柯南/剧场版/业火的向日葵|名侦探柯南 业火的向日葵]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(噩夢|恶梦|惡夢)\]\]", "[[名侦探柯南/剧场版/纯黑的噩梦|名侦探柯南 纯黑的噩梦]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(恋歌|戀歌)\]\]", "[[名侦探柯南/剧场版/唐红的恋歌|名侦探柯南 唐红的恋歌]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(执行人|執行人)\]\]", "[[名侦探柯南/剧场版/零的执行人|名侦探柯南 零的执行人]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(绀青|紺青)[^\n\]\[]+\]\]", "[[名侦探柯南/剧场版/绀青之拳|名侦探柯南 绀青之拳]]"], | |
[r"\[\[(名侦探柯南|名偵探柯南)[^\n\]\[]+(子弹|子彈|弹丸|彈丸)\]\]", "[[名侦探柯南/剧场版/绯色的子弹|名侦探柯南 绯色的子弹]]"], | |
] + [ | |
# 其他 | |
[r"Monark", "罪恶王权", re.M] | |
] + [ | |
# 收尾 | |
[r"\[\[([^\|]+)\|\1\]\]", r"[[\1]]"] | |
] | |
# 以下内容仅在部分段落替换 | |
replaceSectionPatterns = { | |
# 替换电视动画 | |
"anime": { | |
"start": re.compile(r"^(=+) *(电视|電視|TV)(动画|動畫) *\1", re.M), | |
"end": re.compile(r"(?=^(=+)[^=\n]+\1)", re.M), | |
"list": [ | |
[r"《\[\[(宝可梦|寶可夢)系列(?:\|(宝可梦|寶可夢))?\]\]》", "《[[宝可梦(动画)|宝可梦]]》"], | |
[r"《\[\[(名侦探柯南|名偵探柯南)\]\]》", "《[[名侦探柯南/电视动画|名侦探柯南]]》"], | |
[r"《\[\[(名侦探柯南|名偵探柯南)\|", "《[[名侦探柯南/电视动画|"], | |
] | |
}, | |
# 替换OVA | |
"ova": { | |
"start": re.compile(r"^(=+) *OVA *\1", re.M), | |
"end": re.compile(r"(?=^(=+)[^=\n]+\1)", re.M), | |
"list": [ | |
[r"《\[\[(名侦探柯南|名偵探柯南)\|", "《[[名侦探柯南/OVA|"], | |
] | |
}, | |
# 替换剧场版 | |
"movie": { | |
"start": re.compile(r"^(=+) *(剧场版|劇場版)(动画|動畫) *\1", re.M), | |
"end": re.compile(r"(?=^(=+)[^=\n]+\1)", re.M), | |
"list": [ | |
[r"《\[\[(名侦探柯南|名偵探柯南)\|", "《[[名侦探柯南/剧场版|"], | |
] | |
} | |
} | |
replaceSummary = f"机器人:修正作品译名及内链 [V:{VERSION:03X} | [[User:Xzonn|U:Xzonn]] | [[User talk:Xzonn|反馈]]]" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: UTF-8 -*- | |
import re | |
from FixPageLinkNamesData import replaceGeneralList, replaceSectionPatterns | |
def FixPageLinkNames(edi): | |
# 替换部分段落 | |
for section in replaceSectionPatterns.values(): | |
sectionStart = section["start"].search(edi) | |
while sectionStart: | |
sectionStartPos = sectionStart.span()[1] | |
sectionEnd = section["end"].search(edi, sectionStartPos) | |
if sectionEnd: | |
sectionEndPos = sectionEnd.span()[0] | |
else: | |
sectionEndPos = -1 | |
sectionContent = edi[sectionStartPos : sectionEndPos] | |
for line in section["list"]: | |
sectionContent = re.sub(line[0], line[1], sectionContent, flags=(line[2] if len(line) > 2 else 0)) | |
edi = edi[ : sectionStartPos] + sectionContent + edi[sectionEndPos : ] | |
if sectionEndPos <= sectionStartPos: | |
break | |
sectionStart = section["start"].search(edi, sectionEndPos) | |
# 替换全文 | |
for line in replaceGeneralList: | |
edi = re.sub(line[0], line[1], edi, flags=(line[2] if len(line) > 2 else 0)) | |
return edi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: UTF-8 -*- | |
from FixPageLinkNamesFunction import FixPageLinkNames | |
with open("FixPageLinkNamesTitle.txt", "r", -1, "utf-8") as f: | |
titles = sorted(set(f.read().split("\n"))) | |
with open("FixPageLinkNamesTitle.txt", "w", -1, "utf-8") as f: | |
f.write("\n".join(titles)) | |
with open("in.txt", "r", -1, "utf-8") as f: | |
ori = f.read() | |
edi = FixPageLinkNames(ori) | |
with open("out.txt", "w", -1, "utf-8") as f: | |
f.write(edi) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
千叶繁 | |
坂泰斗 | |
小岩井小鸟 | |
朝日奈丸佳 | |
梶原岳人 | |
榊原优希 | |
神尾晋一郎 | |
羽多野涉 | |
黑木穗乃香 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment