NanoSmasher/ShipQuotes.py

## ShipQuotes.py
import mwclient
from mwclient import Site
import pathlib
import configparser # reading config file
import base64 # encryption
import re
import csv
import requests
from bs4 import BeautifulSoup
import logging
logging.basicConfig(filename='quotepages.log',level=logging.INFO)

databasefile = "listofships2.csv"
# file that contains all the ships
# newest version of this file can be retrieved from:
#
# https://azurlane.koumakan.jp/Special:ViewData?title=Special%3AViewData&tables=ships&fields=&where=ships.Rarity+NOT+LIKE+%27Unreleased%27&join_on=&group_by=&having=&order_by=&limit=500&offset=&format=csv
#
excludelist = ['#N/A','Ping Hai','Ning Hai','22','33','Arashi']
# ships that have Chinese only or Chinese specific voice lines

def main():
	# create  settings.ini file if not found, read settings.ini, and decrypt.
	# allows you to log in and save your name/password for running the script in the future
	if not pathlib.Path("settings.ini").is_file():
		logging.info("settings.ini not found. Creating new file...")
		new_settings()
	logging.info('Reading settings.ini')
	config = configparser.ConfigParser()
	config.read('settings.ini')
	DEBUG = config['DEFAULT'].getboolean('debug',False) # enable/[disable] debug messages
	p = ""
	if config['DEFAULT'].getboolean('encrypt',False):
		p = getpass.getpass("Client information is encrpyted. Please enter password: ")

	# retrieve ships from database
	logging.info('Getting information from database...')
	database = getdb()
	logging.info('ships retrieved: '+str(len(database)))

	# log into AzurLane wiki
	site = mwclient.Site('azurlane.koumakan.jp')
	site.login(decode(p,config['DEFAULT'].get('user')), decode(p,config['DEFAULT'].get('pass')))

	# ship loop
	for i, ship in enumerate(database):

		print('Requesting #'+str(i)+': '+ship)
		logging.info('Requesting #'+str(i)+': '+ship)

		page = site.pages[ship+'/Quotes']
		if page.exists:
			logging.info('Page '+ship+'/Quotes exists. Skipping...')
			continue
		if not page.can('edit'):
			logging.info('Page '+ship+'/Quotes can not be edited. Skipping...')
			continue

		result = requests.get("https://azurlane.koumakan.jp/"+ship)
		if result.status_code == 400:
			logging.info('Page is not responding...')
			continue

		c = result.content
		soup = BeautifulSoup(c, 'html.parser')

		# Each of the tabbers have a specific tabber title
		ctab = soup.find(title="Chinese Ship Lines ")
		cetab = soup.find(title="Chinese Lines Extended")
		jtab = soup.find(title="Japanese Ship Lines ")
		jetab = soup.find(title="Japanese Lines Extended")

		if ctab is None or jtab is None:
			logging.info('Default skin lines not found. Skipping..')
			continue

		# the start of the page
		pagetext = "<noinclude>{{ShipMainTabber}}</noinclude>\n"
		pagetext += "<tabber>\n"
		pagetext += "Chinese Server=\n"
		pagetext += "===Default Skin===\n"
		pagetext += "{{QuoteHeader}}" + quoting(ctab.table.find_all('tr'),"CN")
		if cetab:
			pagetext += valentines(cetab,"Valentine's Day 2018","CN"," Valentines 2018")
			pagetext += valentines(cetab,"Valentine's Day 2019","CN"," Valentines 2019")
			pagetext += "|}\n"
			q = quoting(cetab.table.find_all('tr'),"CN")[2:]
			if len(q) > 0:
				pagetext += q+"|}\n"
		else:
			pagetext += "|}\n"
		pagetext += "|-|\n"
		pagetext += "Japanese Server=\n"
		pagetext += "===Default Skin===\n"
		pagetext += "{{QuoteHeader}}" + quoting(jtab.table.find_all('tr'),"JN")
		if jetab:
			pagetext += valentines(jetab,"Valentine's Day 2018","JP"," Valentines 2018")
			pagetext += valentines(jetab,"Valentine's Day 2019","JP"," Valentines 2019")
			pagetext += "|}\n"
			q = quoting(jetab.table.find_all('tr'),"JP")[2:]
			if len(q) > 0:
				pagetext += q+"|}\n"
		else:
			pagetext += "|}\n"
		pagetext += "</tabber>"

		page.save(pagetext, summary=u'added Quote Page', minor=False, bot=True)
		#with open('quote'+ship+'.txt', 'a+', encoding="utf-8") as myfile:
		#	myfile.write(pagetext)

		logging.info('Created '+ship+'/Quotes')

	exit()

def valentines(tb,v,t,x):
	if len(tb.table.find_all("th",string=v)) > 0:
		return quote(tb.table.find_all("th",string=v)[0].find_parent().next_sibling(),t,"",x)
	else:
		logging.info("no lines for "+t+x)
		return ""

def quoting(rows,type):
	""" Returns a long string of quotes formatted in a template

	rows of the table to reformat
	type being the region (JP or CN)
	"""
	quotes = ""
	skinname = ""
	for i, row in enumerate(rows):
		if i < 2: # ignore headers
			continue
		r = row.find_all('td')
		if len(r) == 0: # if it's just one big header row
			#print('new skin: '+skinname)
			skinname = row.th.get_text() # th is not necessary but for completion
			if skinname == "Valentine's Day 2018" or skinname == "Valentine's Day 2019":
				continue
			quotes += "|}\n==="+skinname+"===\n"
			quotes += "{{QuoteHeader|mw-collapsed}}"
			continue

		if skinname == "Valentine's Day 2018" or skinname == "Valentine's Day 2019":
			continue

		# Each event can have multiple voice clips (which is annoying). This is to split them
		quotes += quote(r,type,skinname,'')
	return quotes

def quote(r,t,s,x):
	""" Creates a quote from a specific row

	[r]ow, or more specifically, the list Resultset containing a bunch of <td>s
	[t]ype being JP or CN or EN
	[s]kin name
	e[x]tra string to atach to the event description
	"""
	quotes = ""
	clips = max(1, len(r[0].find_all('a')), len(r[1].find_all('a')))
	for v in range(1, clips+1):
		cn = "" # split up to make it easier to follow
		jp = ""
		line = ""
		translate = ""
		event = ""
		note = ""
		skin = ""

		#grab the vth voice clip if it exists
		if len(r[0].find_all('a')) >= v:
			cn = fname(r[0].find_all('a')[v-1].get('href'))
		if len(r[1].find_all('a')) >= v:
			jp = fname(r[1].find_all('a')[v-1].get('href'))

		# if it's not the last voice clip then get the vth translation
		# otherwise grab vth to last translation
		l = textlist( str(r[3]) )
		if len(l) >= v:
			line = l[v-1] if v != clips else "\n".join(l[v-1:])
		tl = textlist( str(r[4]) )
		if len(tl) >= v:
			translate = tl[v-1] if v != clips else "\n".join(tl[v-1:])

		# the easier ones that don't have multiple lines
		event = r[2].get_text().strip()
		note = r[5].get_text().strip()
		skin = s[s.find('-')+1:].strip()

		# now to fill in the actual quote template.
		quotes += "{{Quote"+ "\n"
		quotes += "| Region = " + t + "\n"
		if jp:
			quotes += "| VoiceClipJP = " + jp + "\n"
		if cn:
			quotes += "| VoiceClipCN = " + cn+ "\n"
		quotes += "| Event = " + event + x + "\n"
		quotes += "| Line = " + line + "\n"
		quotes += "| Translation = " + translate+ "\n"
		if skin:
			quotes += "| Skin = " + skin + "\n"
		if note:
			quotes += "| Note = " + note+ "\n"
		quotes += "}}" + "\n"
	return quotes

def textlist(text):
	""" Create a list of text that have been seperated by html <p><br> """
	text = text.replace('<p>','\n')
	text = text.replace('</p>','\n')
	text = text.replace('<br>','\n')
	text = re.sub('<[^>]*>','',text) # remove all other tags
	text = re.sub('\\n(\\n)+','\n',text) # crush all trailing newlines
	l = text.split('\n') # get line by line
	return list( filter(None, l) ) # filter out the empty lines (technically not as necessary as newlines have been crushed)

def fname(n):
	""" Get text after last occurance of / """
	return n[n.rfind('/')+1:]

def encode(key, clear):
	""" Vigenere cipher encoder """
	if not len(key): return clear
	enc = []
	for i in range(len(clear)):
		key_c = key[i % len(key)]
		enc_c = chr((ord(clear[i]) + ord(key_c)) % 256)
		enc.append(enc_c)
	return base64.b64encode("".join(enc).encode()).decode()

def decode(key, enc):
	""" Vigenere cipher decoder """
	if not len(key): return enc
	dec = []
	enc = base64.b64decode(enc).decode()
	for i in range(len(enc)):
		key_c = key[i % len(key)]
		dec_c = chr((256 + ord(enc[i]) - ord(key_c)) % 256)
		dec.append(dec_c)
	return "".join(dec)

def new_settings():
	""" Creates settings.ini with the following information:

	Username
	Password
	Optional Encrpytion of the above credentials
	"""
	config = configparser.ConfigParser()
	config['DEFAULT'] = {}
	config['DEFAULT']['debug'] = '0'
	config['DEFAULT']['encrypt'] = '0'
	config['DEFAULT']['user'] = input("Username: ")
	config['DEFAULT']['pass'] = input("Password: ")
	while True:
		a = input("Do you want to encrypt your settings.ini file? [Y/n]: ")
		if a in ['Y','y','N','n','']: break
		print("invalid response")
	if a in ['Y','y','']:
		p = getpass.getpass("Enter password to use: ")
		config['DEFAULT']['user'] = encode(p,config['DEFAULT']['user'])
		config['DEFAULT']['pass'] = encode(p,config['DEFAULT']['pass'])
		config['DEFAULT']['encrypt'] = 1
	with open('settings.ini', 'w') as cf:
		config.write(cf)

def getdb():
	""" Grab database file and sorts key values into a dictionary
	"""
	d = []
	with open(databasefile, newline='', encoding="utf8") as f:
		for c in list(csv.reader(f, delimiter=',')):
			if c[0] in excludelist:
				continue
			d.append(c[0])
	return d

if __name__ == "__main__":
	try:
		main()
	except Exception:
		logging.error("Fatal error in main loop", exc_info=True)
	import mwclient
	from mwclient import Site
	import pathlib
	import configparser # reading config file
	import base64 # encryption
	import re
	import csv
	import requests
	from bs4 import BeautifulSoup
	import logging
	logging.basicConfig(filename='quotepages.log',level=logging.INFO)

	databasefile = "listofships2.csv"
	# file that contains all the ships
	# newest version of this file can be retrieved from:
	#
	# https://azurlane.koumakan.jp/Special:ViewData?title=Special%3AViewData&tables=ships&fields=&where=ships.Rarity+NOT+LIKE+%27Unreleased%27&join_on=&group_by=&having=&order_by=&limit=500&offset=&format=csv
	#
	excludelist = ['#N/A','Ping Hai','Ning Hai','22','33','Arashi']
	# ships that have Chinese only or Chinese specific voice lines

	def main():
	# create settings.ini file if not found, read settings.ini, and decrypt.
	# allows you to log in and save your name/password for running the script in the future
	if not pathlib.Path("settings.ini").is_file():
	logging.info("settings.ini not found. Creating new file...")
	new_settings()
	logging.info('Reading settings.ini')
	config = configparser.ConfigParser()
	config.read('settings.ini')
	DEBUG = config['DEFAULT'].getboolean('debug',False) # enable/[disable] debug messages
	p = ""
	if config['DEFAULT'].getboolean('encrypt',False):
	p = getpass.getpass("Client information is encrpyted. Please enter password: ")

	# retrieve ships from database
	logging.info('Getting information from database...')
	database = getdb()
	logging.info('ships retrieved: '+str(len(database)))

	# log into AzurLane wiki
	site = mwclient.Site('azurlane.koumakan.jp')
	site.login(decode(p,config['DEFAULT'].get('user')), decode(p,config['DEFAULT'].get('pass')))

	# ship loop
	for i, ship in enumerate(database):

	print('Requesting #'+str(i)+': '+ship)
	logging.info('Requesting #'+str(i)+': '+ship)

	page = site.pages[ship+'/Quotes']
	if page.exists:
	logging.info('Page '+ship+'/Quotes exists. Skipping...')
	continue
	if not page.can('edit'):
	logging.info('Page '+ship+'/Quotes can not be edited. Skipping...')
	continue

	result = requests.get("https://azurlane.koumakan.jp/"+ship)
	if result.status_code == 400:
	logging.info('Page is not responding...')
	continue

	c = result.content
	soup = BeautifulSoup(c, 'html.parser')

	# Each of the tabbers have a specific tabber title
	ctab = soup.find(title="Chinese Ship Lines ")
	cetab = soup.find(title="Chinese Lines Extended")
	jtab = soup.find(title="Japanese Ship Lines ")
	jetab = soup.find(title="Japanese Lines Extended")

	if ctab is None or jtab is None:
	logging.info('Default skin lines not found. Skipping..')
	continue

	# the start of the page
	pagetext = "<noinclude>{{ShipMainTabber}}</noinclude>\n"
	pagetext += "<tabber>\n"
	pagetext += "Chinese Server=\n"
	pagetext += "===Default Skin===\n"
	pagetext += "{{QuoteHeader}}" + quoting(ctab.table.find_all('tr'),"CN")
	if cetab:
	pagetext += valentines(cetab,"Valentine's Day 2018","CN"," Valentines 2018")
	pagetext += valentines(cetab,"Valentine's Day 2019","CN"," Valentines 2019")
	pagetext += "\|}\n"
	q = quoting(cetab.table.find_all('tr'),"CN")[2:]
	if len(q) > 0:
	pagetext += q+"\|}\n"
	else:
	pagetext += "\|}\n"
	pagetext += "\|-\|\n"
	pagetext += "Japanese Server=\n"
	pagetext += "===Default Skin===\n"
	pagetext += "{{QuoteHeader}}" + quoting(jtab.table.find_all('tr'),"JN")
	if jetab:
	pagetext += valentines(jetab,"Valentine's Day 2018","JP"," Valentines 2018")
	pagetext += valentines(jetab,"Valentine's Day 2019","JP"," Valentines 2019")
	pagetext += "\|}\n"
	q = quoting(jetab.table.find_all('tr'),"JP")[2:]
	if len(q) > 0:
	pagetext += q+"\|}\n"
	else:
	pagetext += "\|}\n"
	pagetext += "</tabber>"

	page.save(pagetext, summary=u'added Quote Page', minor=False, bot=True)
	#with open('quote'+ship+'.txt', 'a+', encoding="utf-8") as myfile:
	# myfile.write(pagetext)

	logging.info('Created '+ship+'/Quotes')

	exit()

	def valentines(tb,v,t,x):
	if len(tb.table.find_all("th",string=v)) > 0:
	return quote(tb.table.find_all("th",string=v)[0].find_parent().next_sibling(),t,"",x)
	else:
	logging.info("no lines for "+t+x)
	return ""

	def quoting(rows,type):
	""" Returns a long string of quotes formatted in a template

	rows of the table to reformat
	type being the region (JP or CN)
	"""
	quotes = ""
	skinname = ""
	for i, row in enumerate(rows):
	if i < 2: # ignore headers
	continue
	r = row.find_all('td')
	if len(r) == 0: # if it's just one big header row
	#print('new skin: '+skinname)
	skinname = row.th.get_text() # th is not necessary but for completion
	if skinname == "Valentine's Day 2018" or skinname == "Valentine's Day 2019":
	continue
	quotes += "\|}\n==="+skinname+"===\n"
	quotes += "{{QuoteHeader\|mw-collapsed}}"
	continue

	if skinname == "Valentine's Day 2018" or skinname == "Valentine's Day 2019":
	continue

	# Each event can have multiple voice clips (which is annoying). This is to split them
	quotes += quote(r,type,skinname,'')
	return quotes

	def quote(r,t,s,x):
	""" Creates a quote from a specific row

	[r]ow, or more specifically, the list Resultset containing a bunch of <td>s
	[t]ype being JP or CN or EN
	[s]kin name
	e[x]tra string to atach to the event description
	"""
	quotes = ""
	clips = max(1, len(r[0].find_all('a')), len(r[1].find_all('a')))
	for v in range(1, clips+1):
	cn = "" # split up to make it easier to follow
	jp = ""
	line = ""
	translate = ""
	event = ""
	note = ""
	skin = ""

	#grab the vth voice clip if it exists
	if len(r[0].find_all('a')) >= v:
	cn = fname(r[0].find_all('a')[v-1].get('href'))
	if len(r[1].find_all('a')) >= v:
	jp = fname(r[1].find_all('a')[v-1].get('href'))

	# if it's not the last voice clip then get the vth translation
	# otherwise grab vth to last translation
	l = textlist( str(r[3]) )
	if len(l) >= v:
	line = l[v-1] if v != clips else "\n".join(l[v-1:])
	tl = textlist( str(r[4]) )
	if len(tl) >= v:
	translate = tl[v-1] if v != clips else "\n".join(tl[v-1:])

	# the easier ones that don't have multiple lines
	event = r[2].get_text().strip()
	note = r[5].get_text().strip()
	skin = s[s.find('-')+1:].strip()

	# now to fill in the actual quote template.
	quotes += "{{Quote"+ "\n"
	quotes += "\| Region = " + t + "\n"
	if jp:
	quotes += "\| VoiceClipJP = " + jp + "\n"
	if cn:
	quotes += "\| VoiceClipCN = " + cn+ "\n"
	quotes += "\| Event = " + event + x + "\n"
	quotes += "\| Line = " + line + "\n"
	quotes += "\| Translation = " + translate+ "\n"
	if skin:
	quotes += "\| Skin = " + skin + "\n"
	if note:
	quotes += "\| Note = " + note+ "\n"
	quotes += "}}" + "\n"
	return quotes

	def textlist(text):
	""" Create a list of text that have been seperated by html <p><br> """
	text = text.replace('<p>','\n')
	text = text.replace('</p>','\n')
	text = text.replace('<br>','\n')
	text = re.sub('<[^>]*>','',text) # remove all other tags
	text = re.sub('\\n(\\n)+','\n',text) # crush all trailing newlines
	l = text.split('\n') # get line by line
	return list( filter(None, l) ) # filter out the empty lines (technically not as necessary as newlines have been crushed)

	def fname(n):
	""" Get text after last occurance of / """
	return n[n.rfind('/')+1:]

	def encode(key, clear):
	""" Vigenere cipher encoder """
	if not len(key): return clear
	enc = []
	for i in range(len(clear)):
	key_c = key[i % len(key)]
	enc_c = chr((ord(clear[i]) + ord(key_c)) % 256)
	enc.append(enc_c)
	return base64.b64encode("".join(enc).encode()).decode()

	def decode(key, enc):
	""" Vigenere cipher decoder """
	if not len(key): return enc
	dec = []
	enc = base64.b64decode(enc).decode()
	for i in range(len(enc)):
	key_c = key[i % len(key)]
	dec_c = chr((256 + ord(enc[i]) - ord(key_c)) % 256)
	dec.append(dec_c)
	return "".join(dec)

	def new_settings():
	""" Creates settings.ini with the following information:

	Username
	Password
	Optional Encrpytion of the above credentials
	"""
	config = configparser.ConfigParser()
	config['DEFAULT'] = {}
	config['DEFAULT']['debug'] = '0'
	config['DEFAULT']['encrypt'] = '0'
	config['DEFAULT']['user'] = input("Username: ")
	config['DEFAULT']['pass'] = input("Password: ")
	while True:
	a = input("Do you want to encrypt your settings.ini file? [Y/n]: ")
	if a in ['Y','y','N','n','']: break
	print("invalid response")
	if a in ['Y','y','']:
	p = getpass.getpass("Enter password to use: ")
	config['DEFAULT']['user'] = encode(p,config['DEFAULT']['user'])
	config['DEFAULT']['pass'] = encode(p,config['DEFAULT']['pass'])
	config['DEFAULT']['encrypt'] = 1
	with open('settings.ini', 'w') as cf:
	config.write(cf)

	def getdb():
	""" Grab database file and sorts key values into a dictionary
	"""
	d = []
	with open(databasefile, newline='', encoding="utf8") as f:
	for c in list(csv.reader(f, delimiter=',')):
	if c[0] in excludelist:
	continue
	d.append(c[0])
	return d

	if __name__ == "__main__":
	try:
	main()
	except Exception:
	logging.error("Fatal error in main loop", exc_info=True)