Created
June 12, 2022 03:41
-
-
Save beer-psi/0eb797c691c45ebca5b8f6857132871b to your computer and use it in GitHub Desktop.
Scrapes Phigros chart constants from wikiwiki.jp/phigros
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
[tool.poetry.dependencies] | |
python = "^3.8" | |
beautifulsoup4 = "^4.11.1" | |
requests = "^2.28.0" | |
XlsxWriter = "^3.0.3" | |
""" | |
from datetime import datetime | |
from bs4 import BeautifulSoup | |
import requests | |
import xlsxwriter | |
class PhigrosSong: | |
def __init__(self, title: str, difficulty: str, constant: float, **kwargs): | |
self.title = title | |
self.difficulty = difficulty | |
self.constant = constant | |
self.artist = kwargs.get("artist") | |
self.notecount = kwargs.get("notecount") | |
self.bpm = kwargs.get("bpm") | |
self.length = kwargs.get("length") | |
self.version = kwargs.get("version") | |
def __str__(self): | |
return f"PhigrosSong(title={self.title}, difficulty={self.difficulty}, constant={self.constant})" | |
def __repr__(self): | |
return f"PhigrosSong(title={self.title}, difficulty={self.difficulty}, constant={self.constant})" | |
@property | |
def difficulty_color(self): | |
if self.difficulty == "AT": | |
return "#cd0000" | |
elif self.difficulty == "IN" or self.difficulty == "Legacy": | |
return "#674ea7" | |
elif self.difficulty == "HD": | |
return "#e69138" | |
elif self.difficulty == "EZ": | |
return "#008000" | |
elif self.difficulty == "SP": | |
return "#808080" | |
def scrape_song_list(variant: str = "high") -> "list[PhigrosSong]": | |
if variant == "low": | |
url = r"https://wikiwiki.jp/phigros/%E4%BD%8E%E3%83%AC%E3%83%99%E3%83%AB%E8%AD%9C%E9%9D%A2%E5%AE%9A%E6%95%B0%E8%A1%A8" | |
elif variant == "medium": | |
url = r"https://wikiwiki.jp/phigros/%E4%B8%AD%E3%83%AC%E3%83%99%E3%83%AB%E8%AD%9C%E9%9D%A2%E5%AE%9A%E6%95%B0%E8%A1%A8" | |
else: | |
url = r"https://wikiwiki.jp/phigros/%E9%AB%98%E3%83%AC%E3%83%99%E3%83%AB%E8%AD%9C%E9%9D%A2%E5%AE%9A%E6%95%B0%28CC%29%E8%A1%A8" | |
with requests.get(url) as resp: | |
if not resp.ok: | |
raise Exception("Could not download webpage") | |
soup = BeautifulSoup(resp.text, "html.parser") | |
songs = [] | |
for (const_elem, table) in zip( | |
soup.find_all("h4", "heading-after-first"), soup.find_all("table") | |
): | |
constant = float(next(const_elem.stripped_strings)) | |
for (idx, row) in enumerate(table.find_all("tr")): | |
if idx == 0: | |
continue | |
cells = row.find_all("td") | |
if cells[0].get("colspan") == "6": | |
continue | |
title = cells[0].get_text() | |
difficulty = cells[1].get_text() | |
artist = cells[2].get_text() | |
notecount = cells[3].get_text() | |
bpm = cells[4].get_text() | |
version = cells[6].get_text() | |
songs.append( | |
PhigrosSong( | |
title, | |
difficulty, | |
constant, | |
artist=artist, | |
notecount=notecount, | |
bpm=bpm, | |
version=version, | |
) | |
) | |
songs.reverse() | |
return songs | |
def construct_workbook(songs: "list[PhigrosSong]", output: str = "result.xlsx"): | |
workbook = xlsxwriter.Workbook(output) | |
worksheet = workbook.add_worksheet("SCORES") | |
bold = workbook.add_format({"bold": True}) | |
worksheet.write( | |
"A1", | |
"Title", | |
workbook.add_format({"bold": True, "bottom": 5}), | |
) | |
worksheet.set_column("A:A", 30) | |
worksheet.write( | |
"B1", | |
"DIFF", | |
workbook.add_format({"bold": True, "bottom": 5}), | |
) | |
worksheet.set_column("B:B", 7) | |
worksheet.write( | |
"C1", | |
"CC", | |
workbook.add_format({"bold": True, "bottom": 5}), | |
) | |
worksheet.set_column("C:C", 4.2) | |
worksheet.write("D1", "ACC (%/100)", bold) | |
worksheet.set_column("D:D", 14) | |
worksheet.write("E1", "PLAY RATING", bold) | |
worksheet.set_column("E:E", 14) | |
cell_colors = ["#d9d2e9", "#cfe2f3"] | |
cell_style = 0 | |
prev_constant = 0.0 | |
for (idx, song) in enumerate(songs): | |
index = idx + 2 | |
if not song.constant == prev_constant: | |
prev_constant = song.constant | |
cell_style = 1 if cell_style == 0 else 0 | |
worksheet.write( | |
f"A{index}", | |
song.title, | |
workbook.add_format( | |
{ | |
"bg_color": cell_colors[cell_style], | |
"bottom": 5 if idx == len(songs) - 1 else 1, | |
"right": 1, | |
"left": 1, | |
} | |
), | |
) | |
worksheet.write( | |
f"B{index}", | |
song.difficulty, | |
workbook.add_format( | |
{ | |
"bold": True, | |
"font_color": song.difficulty_color, | |
"bg_color": cell_colors[cell_style], | |
"bottom": 5 if idx == len(songs) - 1 else 1, | |
"right": 1, | |
"left": 1, | |
} | |
), | |
) | |
worksheet.write( | |
f"C{index}", | |
song.constant, | |
workbook.add_format( | |
{ | |
"bg_color": cell_colors[cell_style], | |
"right": 5, | |
"bottom": 5 if idx == len(songs) - 1 else 1, | |
"left": 1, | |
} | |
), | |
) | |
worksheet.write( | |
f"E{index}", | |
f"=IF(D{index}<0.7,0,C{index}*(((100*D{index})-55)/45)^2)", | |
) | |
worksheet = workbook.add_worksheet("RESULTS") | |
worksheet.merge_range( | |
"A1:E1", "BEST 19 RESULTS", workbook.add_format({"align": "center"}) | |
) # type: ignore | |
header_style = workbook.add_format( | |
{"bold": True, "top": 5, "bottom": 1, "left": 1, "bg_color": "#c9daf8"} | |
) | |
worksheet.write("A2", "Title", header_style) | |
worksheet.set_column("A:A", 30) | |
worksheet.write("B2", "DIFF", header_style) | |
worksheet.set_column("B:B", 7) | |
worksheet.write("C2", "CC", header_style) | |
worksheet.set_column("C:C", 4.2) | |
worksheet.write("D2", "ACC (%/100)", header_style) | |
worksheet.set_column("D:D", 14) | |
worksheet.write( | |
"E2", | |
"PLAY RATING", | |
workbook.add_format( | |
{ | |
"bold": True, | |
"top": 5, | |
"bottom": 1, | |
"left": 1, | |
"right": 5, | |
"bg_color": "#c9daf8", | |
} | |
), | |
) | |
worksheet.set_column("E:E", 14) | |
for index in range(3, 22): | |
worksheet.write( | |
f"A{index}", | |
f'=IF(E{index}=0," ",INDEX(SCORES!$A:$E,MATCH(E{index},SCORES!$E:$E,0),1))', | |
workbook.add_format( | |
{ | |
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white", | |
"top": 1, | |
"bottom": 1 if index < 21 else 5, | |
"left": 1, | |
"right": 1, | |
} | |
), | |
) | |
worksheet.write( | |
f"B{index}", | |
f'=IF(E{index}=0," ",INDEX(SCORES!$A:$E,MATCH(E{index},SCORES!$E:$E,0),2))', | |
workbook.add_format( | |
{ | |
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white", | |
"top": 1, | |
"bottom": 1 if index < 21 else 5, | |
"left": 1, | |
"right": 1, | |
} | |
), | |
) | |
worksheet.write( | |
f"C{index}", | |
f'=IF(E{index}=0," ",INDEX(SCORES!$A:$E,MATCH(E{index},SCORES!$E:$E,0),3))', | |
workbook.add_format( | |
{ | |
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white", | |
"top": 1, | |
"bottom": 1 if index < 21 else 5, | |
"left": 1, | |
"right": 1, | |
} | |
), | |
) | |
worksheet.write( | |
f"D{index}", | |
f'=IF(E{index}=0," ",INDEX(SCORES!$A:$E,MATCH(E{index},SCORES!$E:$E,0),4))', | |
workbook.add_format( | |
{ | |
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white", | |
"top": 1, | |
"bottom": 1 if index < 21 else 5, | |
"left": 1, | |
"right": 1, | |
} | |
), | |
) | |
worksheet.write( | |
f"E{index}", | |
f"=LARGE(SCORES!$E:$E,ROW(E{index})-2)", | |
workbook.add_format( | |
{ | |
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white", | |
"top": 1, | |
"bottom": 1 if index < 21 else 5, | |
"left": 1, | |
"right": 5, | |
} | |
), | |
) | |
worksheet.merge_range( | |
"A23:C23", | |
"Ratings above", | |
workbook.add_format( | |
{ | |
"top": 5, | |
"bottom": 1, | |
"right": 1, | |
"left": 1, | |
} | |
), | |
) # type: ignore | |
worksheet.write( | |
"D23", | |
15, | |
workbook.add_format( | |
{ | |
"top": 5, | |
"bottom": 1, | |
"right": 1, | |
"left": 1, | |
} | |
), | |
) | |
worksheet.write( | |
"E23", | |
'=COUNTIF(E2:E21, ">="&D23)', | |
workbook.add_format( | |
{ | |
"top": 5, | |
"bottom": 1, | |
"right": 5, | |
"left": 1, | |
} | |
), | |
) | |
worksheet.merge_range( | |
"A24:D24", | |
"Ranking Score", | |
workbook.add_format( | |
{"top": 1, "bottom": 5, "left": 1, "right": 1, "bg_color": "yellow"} | |
), | |
) # type: ignore | |
worksheet.write( | |
"E24", | |
"=AVERAGE(E3, E3:E21)", | |
workbook.add_format( | |
{"top": 1, "bottom": 5, "left": 1, "right": 5, "bg_color": "yellow"} | |
), | |
) | |
workbook.close() | |
def main(): | |
print("Getting high-level song list...") | |
songs = scrape_song_list() | |
print("Getting medium-level song list...") | |
songs += scrape_song_list("medium") | |
print("Getting low-level song list...") | |
songs += scrape_song_list("low") | |
print("Creating result workbook...") | |
construct_workbook(songs, f"Phigros RKS {datetime.now().strftime('%Y-%m-%d')}.xlsx") | |
print("Done.") | |
if __name__ == "__main__": | |
exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment