Last active
May 7, 2019 00:49
-
-
Save NanoSmasher/82b088e59aaca654b444a4c53b3bf140 to your computer and use it in GitHub Desktop.
program to create quote pages using the new quote format for al wiki
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mwclient | |
from mwclient import Site | |
import pathlib | |
import configparser # reading config file | |
import base64 # encryption | |
import re | |
import csv | |
import requests | |
from bs4 import BeautifulSoup | |
import logging | |
logging.basicConfig(filename='quotepages.log',level=logging.INFO) | |
databasefile = "listofships2.csv" | |
# file that contains all the ships | |
# newest version of this file can be retrieved from: | |
# | |
# https://azurlane.koumakan.jp/Special:ViewData?title=Special%3AViewData&tables=ships&fields=&where=ships.Rarity+NOT+LIKE+%27Unreleased%27&join_on=&group_by=&having=&order_by=&limit=500&offset=&format=csv | |
# | |
excludelist = ['#N/A','Ping Hai','Ning Hai','22','33','Arashi'] | |
# ships that have Chinese only or Chinese specific voice lines | |
def main(): | |
# create settings.ini file if not found, read settings.ini, and decrypt. | |
# allows you to log in and save your name/password for running the script in the future | |
if not pathlib.Path("settings.ini").is_file(): | |
logging.info("settings.ini not found. Creating new file...") | |
new_settings() | |
logging.info('Reading settings.ini') | |
config = configparser.ConfigParser() | |
config.read('settings.ini') | |
DEBUG = config['DEFAULT'].getboolean('debug',False) # enable/[disable] debug messages | |
p = "" | |
if config['DEFAULT'].getboolean('encrypt',False): | |
p = getpass.getpass("Client information is encrpyted. Please enter password: ") | |
# retrieve ships from database | |
logging.info('Getting information from database...') | |
database = getdb() | |
logging.info('ships retrieved: '+str(len(database))) | |
# log into AzurLane wiki | |
site = mwclient.Site('azurlane.koumakan.jp') | |
site.login(decode(p,config['DEFAULT'].get('user')), decode(p,config['DEFAULT'].get('pass'))) | |
# ship loop | |
for i, ship in enumerate(database): | |
print('Requesting #'+str(i)+': '+ship) | |
logging.info('Requesting #'+str(i)+': '+ship) | |
page = site.pages[ship+'/Quotes'] | |
if page.exists: | |
logging.info('Page '+ship+'/Quotes exists. Skipping...') | |
continue | |
if not page.can('edit'): | |
logging.info('Page '+ship+'/Quotes can not be edited. Skipping...') | |
continue | |
result = requests.get("https://azurlane.koumakan.jp/"+ship) | |
if result.status_code == 400: | |
logging.info('Page is not responding...') | |
continue | |
c = result.content | |
soup = BeautifulSoup(c, 'html.parser') | |
# Each of the tabbers have a specific tabber title | |
ctab = soup.find(title="Chinese Ship Lines ") | |
cetab = soup.find(title="Chinese Lines Extended") | |
jtab = soup.find(title="Japanese Ship Lines ") | |
jetab = soup.find(title="Japanese Lines Extended") | |
if ctab is None or jtab is None: | |
logging.info('Default skin lines not found. Skipping..') | |
continue | |
# the start of the page | |
pagetext = "<noinclude>{{ShipMainTabber}}</noinclude>\n" | |
pagetext += "<tabber>\n" | |
pagetext += "Chinese Server=\n" | |
pagetext += "===Default Skin===\n" | |
pagetext += "{{QuoteHeader}}" + quoting(ctab.table.find_all('tr'),"CN") | |
if cetab: | |
pagetext += valentines(cetab,"Valentine's Day 2018","CN"," Valentines 2018") | |
pagetext += valentines(cetab,"Valentine's Day 2019","CN"," Valentines 2019") | |
pagetext += "|}\n" | |
q = quoting(cetab.table.find_all('tr'),"CN")[2:] | |
if len(q) > 0: | |
pagetext += q+"|}\n" | |
else: | |
pagetext += "|}\n" | |
pagetext += "|-|\n" | |
pagetext += "Japanese Server=\n" | |
pagetext += "===Default Skin===\n" | |
pagetext += "{{QuoteHeader}}" + quoting(jtab.table.find_all('tr'),"JN") | |
if jetab: | |
pagetext += valentines(jetab,"Valentine's Day 2018","JP"," Valentines 2018") | |
pagetext += valentines(jetab,"Valentine's Day 2019","JP"," Valentines 2019") | |
pagetext += "|}\n" | |
q = quoting(jetab.table.find_all('tr'),"JP")[2:] | |
if len(q) > 0: | |
pagetext += q+"|}\n" | |
else: | |
pagetext += "|}\n" | |
pagetext += "</tabber>" | |
page.save(pagetext, summary=u'added Quote Page', minor=False, bot=True) | |
#with open('quote'+ship+'.txt', 'a+', encoding="utf-8") as myfile: | |
# myfile.write(pagetext) | |
logging.info('Created '+ship+'/Quotes') | |
exit() | |
def valentines(tb,v,t,x): | |
if len(tb.table.find_all("th",string=v)) > 0: | |
return quote(tb.table.find_all("th",string=v)[0].find_parent().next_sibling(),t,"",x) | |
else: | |
logging.info("no lines for "+t+x) | |
return "" | |
def quoting(rows,type): | |
""" Returns a long string of quotes formatted in a template | |
rows of the table to reformat | |
type being the region (JP or CN) | |
""" | |
quotes = "" | |
skinname = "" | |
for i, row in enumerate(rows): | |
if i < 2: # ignore headers | |
continue | |
r = row.find_all('td') | |
if len(r) == 0: # if it's just one big header row | |
#print('new skin: '+skinname) | |
skinname = row.th.get_text() # th is not necessary but for completion | |
if skinname == "Valentine's Day 2018" or skinname == "Valentine's Day 2019": | |
continue | |
quotes += "|}\n==="+skinname+"===\n" | |
quotes += "{{QuoteHeader|mw-collapsed}}" | |
continue | |
if skinname == "Valentine's Day 2018" or skinname == "Valentine's Day 2019": | |
continue | |
# Each event can have multiple voice clips (which is annoying). This is to split them | |
quotes += quote(r,type,skinname,'') | |
return quotes | |
def quote(r,t,s,x): | |
""" Creates a quote from a specific row | |
[r]ow, or more specifically, the list Resultset containing a bunch of <td>s | |
[t]ype being JP or CN or EN | |
[s]kin name | |
e[x]tra string to atach to the event description | |
""" | |
quotes = "" | |
clips = max(1, len(r[0].find_all('a')), len(r[1].find_all('a'))) | |
for v in range(1, clips+1): | |
cn = "" # split up to make it easier to follow | |
jp = "" | |
line = "" | |
translate = "" | |
event = "" | |
note = "" | |
skin = "" | |
#grab the vth voice clip if it exists | |
if len(r[0].find_all('a')) >= v: | |
cn = fname(r[0].find_all('a')[v-1].get('href')) | |
if len(r[1].find_all('a')) >= v: | |
jp = fname(r[1].find_all('a')[v-1].get('href')) | |
# if it's not the last voice clip then get the vth translation | |
# otherwise grab vth to last translation | |
l = textlist( str(r[3]) ) | |
if len(l) >= v: | |
line = l[v-1] if v != clips else "\n".join(l[v-1:]) | |
tl = textlist( str(r[4]) ) | |
if len(tl) >= v: | |
translate = tl[v-1] if v != clips else "\n".join(tl[v-1:]) | |
# the easier ones that don't have multiple lines | |
event = r[2].get_text().strip() | |
note = r[5].get_text().strip() | |
skin = s[s.find('-')+1:].strip() | |
# now to fill in the actual quote template. | |
quotes += "{{Quote"+ "\n" | |
quotes += "| Region = " + t + "\n" | |
if jp: | |
quotes += "| VoiceClipJP = " + jp + "\n" | |
if cn: | |
quotes += "| VoiceClipCN = " + cn+ "\n" | |
quotes += "| Event = " + event + x + "\n" | |
quotes += "| Line = " + line + "\n" | |
quotes += "| Translation = " + translate+ "\n" | |
if skin: | |
quotes += "| Skin = " + skin + "\n" | |
if note: | |
quotes += "| Note = " + note+ "\n" | |
quotes += "}}" + "\n" | |
return quotes | |
def textlist(text): | |
""" Create a list of text that have been seperated by html <p><br> """ | |
text = text.replace('<p>','\n') | |
text = text.replace('</p>','\n') | |
text = text.replace('<br>','\n') | |
text = re.sub('<[^>]*>','',text) # remove all other tags | |
text = re.sub('\\n(\\n)+','\n',text) # crush all trailing newlines | |
l = text.split('\n') # get line by line | |
return list( filter(None, l) ) # filter out the empty lines (technically not as necessary as newlines have been crushed) | |
def fname(n): | |
""" Get text after last occurance of / """ | |
return n[n.rfind('/')+1:] | |
def encode(key, clear): | |
""" Vigenere cipher encoder """ | |
if not len(key): return clear | |
enc = [] | |
for i in range(len(clear)): | |
key_c = key[i % len(key)] | |
enc_c = chr((ord(clear[i]) + ord(key_c)) % 256) | |
enc.append(enc_c) | |
return base64.b64encode("".join(enc).encode()).decode() | |
def decode(key, enc): | |
""" Vigenere cipher decoder """ | |
if not len(key): return enc | |
dec = [] | |
enc = base64.b64decode(enc).decode() | |
for i in range(len(enc)): | |
key_c = key[i % len(key)] | |
dec_c = chr((256 + ord(enc[i]) - ord(key_c)) % 256) | |
dec.append(dec_c) | |
return "".join(dec) | |
def new_settings(): | |
""" Creates settings.ini with the following information: | |
Username | |
Password | |
Optional Encrpytion of the above credentials | |
""" | |
config = configparser.ConfigParser() | |
config['DEFAULT'] = {} | |
config['DEFAULT']['debug'] = '0' | |
config['DEFAULT']['encrypt'] = '0' | |
config['DEFAULT']['user'] = input("Username: ") | |
config['DEFAULT']['pass'] = input("Password: ") | |
while True: | |
a = input("Do you want to encrypt your settings.ini file? [Y/n]: ") | |
if a in ['Y','y','N','n','']: break | |
print("invalid response") | |
if a in ['Y','y','']: | |
p = getpass.getpass("Enter password to use: ") | |
config['DEFAULT']['user'] = encode(p,config['DEFAULT']['user']) | |
config['DEFAULT']['pass'] = encode(p,config['DEFAULT']['pass']) | |
config['DEFAULT']['encrypt'] = 1 | |
with open('settings.ini', 'w') as cf: | |
config.write(cf) | |
def getdb(): | |
""" Grab database file and sorts key values into a dictionary | |
""" | |
d = [] | |
with open(databasefile, newline='', encoding="utf8") as f: | |
for c in list(csv.reader(f, delimiter=',')): | |
if c[0] in excludelist: | |
continue | |
d.append(c[0]) | |
return d | |
if __name__ == "__main__": | |
try: | |
main() | |
except Exception: | |
logging.error("Fatal error in main loop", exc_info=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment