Skip to content

Instantly share code, notes, and snippets.

@NanoSmasher
Last active May 7, 2019 00:49
Show Gist options
  • Save NanoSmasher/82b088e59aaca654b444a4c53b3bf140 to your computer and use it in GitHub Desktop.
Save NanoSmasher/82b088e59aaca654b444a4c53b3bf140 to your computer and use it in GitHub Desktop.
program to create quote pages using the new quote format for al wiki
import mwclient
from mwclient import Site
import pathlib
import configparser # reading config file
import base64 # encryption
import re
import csv
import requests
from bs4 import BeautifulSoup
import logging
logging.basicConfig(filename='quotepages.log',level=logging.INFO)
databasefile = "listofships2.csv"
# file that contains all the ships
# newest version of this file can be retrieved from:
#
# https://azurlane.koumakan.jp/Special:ViewData?title=Special%3AViewData&tables=ships&fields=&where=ships.Rarity+NOT+LIKE+%27Unreleased%27&join_on=&group_by=&having=&order_by=&limit=500&offset=&format=csv
#
excludelist = ['#N/A','Ping Hai','Ning Hai','22','33','Arashi']
# ships that have Chinese only or Chinese specific voice lines
def main():
# create settings.ini file if not found, read settings.ini, and decrypt.
# allows you to log in and save your name/password for running the script in the future
if not pathlib.Path("settings.ini").is_file():
logging.info("settings.ini not found. Creating new file...")
new_settings()
logging.info('Reading settings.ini')
config = configparser.ConfigParser()
config.read('settings.ini')
DEBUG = config['DEFAULT'].getboolean('debug',False) # enable/[disable] debug messages
p = ""
if config['DEFAULT'].getboolean('encrypt',False):
p = getpass.getpass("Client information is encrpyted. Please enter password: ")
# retrieve ships from database
logging.info('Getting information from database...')
database = getdb()
logging.info('ships retrieved: '+str(len(database)))
# log into AzurLane wiki
site = mwclient.Site('azurlane.koumakan.jp')
site.login(decode(p,config['DEFAULT'].get('user')), decode(p,config['DEFAULT'].get('pass')))
# ship loop
for i, ship in enumerate(database):
print('Requesting #'+str(i)+': '+ship)
logging.info('Requesting #'+str(i)+': '+ship)
page = site.pages[ship+'/Quotes']
if page.exists:
logging.info('Page '+ship+'/Quotes exists. Skipping...')
continue
if not page.can('edit'):
logging.info('Page '+ship+'/Quotes can not be edited. Skipping...')
continue
result = requests.get("https://azurlane.koumakan.jp/"+ship)
if result.status_code == 400:
logging.info('Page is not responding...')
continue
c = result.content
soup = BeautifulSoup(c, 'html.parser')
# Each of the tabbers have a specific tabber title
ctab = soup.find(title="Chinese Ship Lines ")
cetab = soup.find(title="Chinese Lines Extended")
jtab = soup.find(title="Japanese Ship Lines ")
jetab = soup.find(title="Japanese Lines Extended")
if ctab is None or jtab is None:
logging.info('Default skin lines not found. Skipping..')
continue
# the start of the page
pagetext = "<noinclude>{{ShipMainTabber}}</noinclude>\n"
pagetext += "<tabber>\n"
pagetext += "Chinese Server=\n"
pagetext += "===Default Skin===\n"
pagetext += "{{QuoteHeader}}" + quoting(ctab.table.find_all('tr'),"CN")
if cetab:
pagetext += valentines(cetab,"Valentine's Day 2018","CN"," Valentines 2018")
pagetext += valentines(cetab,"Valentine's Day 2019","CN"," Valentines 2019")
pagetext += "|}\n"
q = quoting(cetab.table.find_all('tr'),"CN")[2:]
if len(q) > 0:
pagetext += q+"|}\n"
else:
pagetext += "|}\n"
pagetext += "|-|\n"
pagetext += "Japanese Server=\n"
pagetext += "===Default Skin===\n"
pagetext += "{{QuoteHeader}}" + quoting(jtab.table.find_all('tr'),"JN")
if jetab:
pagetext += valentines(jetab,"Valentine's Day 2018","JP"," Valentines 2018")
pagetext += valentines(jetab,"Valentine's Day 2019","JP"," Valentines 2019")
pagetext += "|}\n"
q = quoting(jetab.table.find_all('tr'),"JP")[2:]
if len(q) > 0:
pagetext += q+"|}\n"
else:
pagetext += "|}\n"
pagetext += "</tabber>"
page.save(pagetext, summary=u'added Quote Page', minor=False, bot=True)
#with open('quote'+ship+'.txt', 'a+', encoding="utf-8") as myfile:
# myfile.write(pagetext)
logging.info('Created '+ship+'/Quotes')
exit()
def valentines(tb,v,t,x):
if len(tb.table.find_all("th",string=v)) > 0:
return quote(tb.table.find_all("th",string=v)[0].find_parent().next_sibling(),t,"",x)
else:
logging.info("no lines for "+t+x)
return ""
def quoting(rows,type):
""" Returns a long string of quotes formatted in a template
rows of the table to reformat
type being the region (JP or CN)
"""
quotes = ""
skinname = ""
for i, row in enumerate(rows):
if i < 2: # ignore headers
continue
r = row.find_all('td')
if len(r) == 0: # if it's just one big header row
#print('new skin: '+skinname)
skinname = row.th.get_text() # th is not necessary but for completion
if skinname == "Valentine's Day 2018" or skinname == "Valentine's Day 2019":
continue
quotes += "|}\n==="+skinname+"===\n"
quotes += "{{QuoteHeader|mw-collapsed}}"
continue
if skinname == "Valentine's Day 2018" or skinname == "Valentine's Day 2019":
continue
# Each event can have multiple voice clips (which is annoying). This is to split them
quotes += quote(r,type,skinname,'')
return quotes
def quote(r,t,s,x):
""" Creates a quote from a specific row
[r]ow, or more specifically, the list Resultset containing a bunch of <td>s
[t]ype being JP or CN or EN
[s]kin name
e[x]tra string to atach to the event description
"""
quotes = ""
clips = max(1, len(r[0].find_all('a')), len(r[1].find_all('a')))
for v in range(1, clips+1):
cn = "" # split up to make it easier to follow
jp = ""
line = ""
translate = ""
event = ""
note = ""
skin = ""
#grab the vth voice clip if it exists
if len(r[0].find_all('a')) >= v:
cn = fname(r[0].find_all('a')[v-1].get('href'))
if len(r[1].find_all('a')) >= v:
jp = fname(r[1].find_all('a')[v-1].get('href'))
# if it's not the last voice clip then get the vth translation
# otherwise grab vth to last translation
l = textlist( str(r[3]) )
if len(l) >= v:
line = l[v-1] if v != clips else "\n".join(l[v-1:])
tl = textlist( str(r[4]) )
if len(tl) >= v:
translate = tl[v-1] if v != clips else "\n".join(tl[v-1:])
# the easier ones that don't have multiple lines
event = r[2].get_text().strip()
note = r[5].get_text().strip()
skin = s[s.find('-')+1:].strip()
# now to fill in the actual quote template.
quotes += "{{Quote"+ "\n"
quotes += "| Region = " + t + "\n"
if jp:
quotes += "| VoiceClipJP = " + jp + "\n"
if cn:
quotes += "| VoiceClipCN = " + cn+ "\n"
quotes += "| Event = " + event + x + "\n"
quotes += "| Line = " + line + "\n"
quotes += "| Translation = " + translate+ "\n"
if skin:
quotes += "| Skin = " + skin + "\n"
if note:
quotes += "| Note = " + note+ "\n"
quotes += "}}" + "\n"
return quotes
def textlist(text):
""" Create a list of text that have been seperated by html <p><br> """
text = text.replace('<p>','\n')
text = text.replace('</p>','\n')
text = text.replace('<br>','\n')
text = re.sub('<[^>]*>','',text) # remove all other tags
text = re.sub('\\n(\\n)+','\n',text) # crush all trailing newlines
l = text.split('\n') # get line by line
return list( filter(None, l) ) # filter out the empty lines (technically not as necessary as newlines have been crushed)
def fname(n):
""" Get text after last occurance of / """
return n[n.rfind('/')+1:]
def encode(key, clear):
""" Vigenere cipher encoder """
if not len(key): return clear
enc = []
for i in range(len(clear)):
key_c = key[i % len(key)]
enc_c = chr((ord(clear[i]) + ord(key_c)) % 256)
enc.append(enc_c)
return base64.b64encode("".join(enc).encode()).decode()
def decode(key, enc):
""" Vigenere cipher decoder """
if not len(key): return enc
dec = []
enc = base64.b64decode(enc).decode()
for i in range(len(enc)):
key_c = key[i % len(key)]
dec_c = chr((256 + ord(enc[i]) - ord(key_c)) % 256)
dec.append(dec_c)
return "".join(dec)
def new_settings():
""" Creates settings.ini with the following information:
Username
Password
Optional Encrpytion of the above credentials
"""
config = configparser.ConfigParser()
config['DEFAULT'] = {}
config['DEFAULT']['debug'] = '0'
config['DEFAULT']['encrypt'] = '0'
config['DEFAULT']['user'] = input("Username: ")
config['DEFAULT']['pass'] = input("Password: ")
while True:
a = input("Do you want to encrypt your settings.ini file? [Y/n]: ")
if a in ['Y','y','N','n','']: break
print("invalid response")
if a in ['Y','y','']:
p = getpass.getpass("Enter password to use: ")
config['DEFAULT']['user'] = encode(p,config['DEFAULT']['user'])
config['DEFAULT']['pass'] = encode(p,config['DEFAULT']['pass'])
config['DEFAULT']['encrypt'] = 1
with open('settings.ini', 'w') as cf:
config.write(cf)
def getdb():
""" Grab database file and sorts key values into a dictionary
"""
d = []
with open(databasefile, newline='', encoding="utf8") as f:
for c in list(csv.reader(f, delimiter=',')):
if c[0] in excludelist:
continue
d.append(c[0])
return d
if __name__ == "__main__":
try:
main()
except Exception:
logging.error("Fatal error in main loop", exc_info=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment