Skip to content

Instantly share code, notes, and snippets.

@beerpiss
Created June 12, 2022 03:41
Show Gist options
  • Save beerpiss/0eb797c691c45ebca5b8f6857132871b to your computer and use it in GitHub Desktop.
Save beerpiss/0eb797c691c45ebca5b8f6857132871b to your computer and use it in GitHub Desktop.
Scrapes Phigros chart constants from wikiwiki.jp/phigros
"""
[tool.poetry.dependencies]
python = "^3.8"
beautifulsoup4 = "^4.11.1"
requests = "^2.28.0"
XlsxWriter = "^3.0.3"
"""
from datetime import datetime
from bs4 import BeautifulSoup
import requests
import xlsxwriter
class PhigrosSong:
def __init__(self, title: str, difficulty: str, constant: float, **kwargs):
self.title = title
self.difficulty = difficulty
self.constant = constant
self.artist = kwargs.get("artist")
self.notecount = kwargs.get("notecount")
self.bpm = kwargs.get("bpm")
self.length = kwargs.get("length")
self.version = kwargs.get("version")
def __str__(self):
return f"PhigrosSong(title={self.title}, difficulty={self.difficulty}, constant={self.constant})"
def __repr__(self):
return f"PhigrosSong(title={self.title}, difficulty={self.difficulty}, constant={self.constant})"
@property
def difficulty_color(self):
if self.difficulty == "AT":
return "#cd0000"
elif self.difficulty == "IN" or self.difficulty == "Legacy":
return "#674ea7"
elif self.difficulty == "HD":
return "#e69138"
elif self.difficulty == "EZ":
return "#008000"
elif self.difficulty == "SP":
return "#808080"
def scrape_song_list(variant: str = "high") -> "list[PhigrosSong]":
if variant == "low":
url = r"https://wikiwiki.jp/phigros/%E4%BD%8E%E3%83%AC%E3%83%99%E3%83%AB%E8%AD%9C%E9%9D%A2%E5%AE%9A%E6%95%B0%E8%A1%A8"
elif variant == "medium":
url = r"https://wikiwiki.jp/phigros/%E4%B8%AD%E3%83%AC%E3%83%99%E3%83%AB%E8%AD%9C%E9%9D%A2%E5%AE%9A%E6%95%B0%E8%A1%A8"
else:
url = r"https://wikiwiki.jp/phigros/%E9%AB%98%E3%83%AC%E3%83%99%E3%83%AB%E8%AD%9C%E9%9D%A2%E5%AE%9A%E6%95%B0%28CC%29%E8%A1%A8"
with requests.get(url) as resp:
if not resp.ok:
raise Exception("Could not download webpage")
soup = BeautifulSoup(resp.text, "html.parser")
songs = []
for (const_elem, table) in zip(
soup.find_all("h4", "heading-after-first"), soup.find_all("table")
):
constant = float(next(const_elem.stripped_strings))
for (idx, row) in enumerate(table.find_all("tr")):
if idx == 0:
continue
cells = row.find_all("td")
if cells[0].get("colspan") == "6":
continue
title = cells[0].get_text()
difficulty = cells[1].get_text()
artist = cells[2].get_text()
notecount = cells[3].get_text()
bpm = cells[4].get_text()
version = cells[6].get_text()
songs.append(
PhigrosSong(
title,
difficulty,
constant,
artist=artist,
notecount=notecount,
bpm=bpm,
version=version,
)
)
songs.reverse()
return songs
def construct_workbook(songs: "list[PhigrosSong]", output: str = "result.xlsx"):
workbook = xlsxwriter.Workbook(output)
worksheet = workbook.add_worksheet("SCORES")
bold = workbook.add_format({"bold": True})
worksheet.write(
"A1",
"Title",
workbook.add_format({"bold": True, "bottom": 5}),
)
worksheet.set_column("A:A", 30)
worksheet.write(
"B1",
"DIFF",
workbook.add_format({"bold": True, "bottom": 5}),
)
worksheet.set_column("B:B", 7)
worksheet.write(
"C1",
"CC",
workbook.add_format({"bold": True, "bottom": 5}),
)
worksheet.set_column("C:C", 4.2)
worksheet.write("D1", "ACC (%/100)", bold)
worksheet.set_column("D:D", 14)
worksheet.write("E1", "PLAY RATING", bold)
worksheet.set_column("E:E", 14)
cell_colors = ["#d9d2e9", "#cfe2f3"]
cell_style = 0
prev_constant = 0.0
for (idx, song) in enumerate(songs):
index = idx + 2
if not song.constant == prev_constant:
prev_constant = song.constant
cell_style = 1 if cell_style == 0 else 0
worksheet.write(
f"A{index}",
song.title,
workbook.add_format(
{
"bg_color": cell_colors[cell_style],
"bottom": 5 if idx == len(songs) - 1 else 1,
"right": 1,
"left": 1,
}
),
)
worksheet.write(
f"B{index}",
song.difficulty,
workbook.add_format(
{
"bold": True,
"font_color": song.difficulty_color,
"bg_color": cell_colors[cell_style],
"bottom": 5 if idx == len(songs) - 1 else 1,
"right": 1,
"left": 1,
}
),
)
worksheet.write(
f"C{index}",
song.constant,
workbook.add_format(
{
"bg_color": cell_colors[cell_style],
"right": 5,
"bottom": 5 if idx == len(songs) - 1 else 1,
"left": 1,
}
),
)
worksheet.write(
f"E{index}",
f"=IF(D{index}<0.7,0,C{index}*(((100*D{index})-55)/45)^2)",
)
worksheet = workbook.add_worksheet("RESULTS")
worksheet.merge_range(
"A1:E1", "BEST 19 RESULTS", workbook.add_format({"align": "center"})
) # type: ignore
header_style = workbook.add_format(
{"bold": True, "top": 5, "bottom": 1, "left": 1, "bg_color": "#c9daf8"}
)
worksheet.write("A2", "Title", header_style)
worksheet.set_column("A:A", 30)
worksheet.write("B2", "DIFF", header_style)
worksheet.set_column("B:B", 7)
worksheet.write("C2", "CC", header_style)
worksheet.set_column("C:C", 4.2)
worksheet.write("D2", "ACC (%/100)", header_style)
worksheet.set_column("D:D", 14)
worksheet.write(
"E2",
"PLAY RATING",
workbook.add_format(
{
"bold": True,
"top": 5,
"bottom": 1,
"left": 1,
"right": 5,
"bg_color": "#c9daf8",
}
),
)
worksheet.set_column("E:E", 14)
for index in range(3, 22):
worksheet.write(
f"A{index}",
f'=IF(E{index}=0," ",INDEX(SCORES!$A:$E,MATCH(E{index},SCORES!$E:$E,0),1))',
workbook.add_format(
{
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white",
"top": 1,
"bottom": 1 if index < 21 else 5,
"left": 1,
"right": 1,
}
),
)
worksheet.write(
f"B{index}",
f'=IF(E{index}=0," ",INDEX(SCORES!$A:$E,MATCH(E{index},SCORES!$E:$E,0),2))',
workbook.add_format(
{
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white",
"top": 1,
"bottom": 1 if index < 21 else 5,
"left": 1,
"right": 1,
}
),
)
worksheet.write(
f"C{index}",
f'=IF(E{index}=0," ",INDEX(SCORES!$A:$E,MATCH(E{index},SCORES!$E:$E,0),3))',
workbook.add_format(
{
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white",
"top": 1,
"bottom": 1 if index < 21 else 5,
"left": 1,
"right": 1,
}
),
)
worksheet.write(
f"D{index}",
f'=IF(E{index}=0," ",INDEX(SCORES!$A:$E,MATCH(E{index},SCORES!$E:$E,0),4))',
workbook.add_format(
{
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white",
"top": 1,
"bottom": 1 if index < 21 else 5,
"left": 1,
"right": 1,
}
),
)
worksheet.write(
f"E{index}",
f"=LARGE(SCORES!$E:$E,ROW(E{index})-2)",
workbook.add_format(
{
"bg_color": "#d9d2e9" if 3 <= index <= 7 else "white",
"top": 1,
"bottom": 1 if index < 21 else 5,
"left": 1,
"right": 5,
}
),
)
worksheet.merge_range(
"A23:C23",
"Ratings above",
workbook.add_format(
{
"top": 5,
"bottom": 1,
"right": 1,
"left": 1,
}
),
) # type: ignore
worksheet.write(
"D23",
15,
workbook.add_format(
{
"top": 5,
"bottom": 1,
"right": 1,
"left": 1,
}
),
)
worksheet.write(
"E23",
'=COUNTIF(E2:E21, ">="&D23)',
workbook.add_format(
{
"top": 5,
"bottom": 1,
"right": 5,
"left": 1,
}
),
)
worksheet.merge_range(
"A24:D24",
"Ranking Score",
workbook.add_format(
{"top": 1, "bottom": 5, "left": 1, "right": 1, "bg_color": "yellow"}
),
) # type: ignore
worksheet.write(
"E24",
"=AVERAGE(E3, E3:E21)",
workbook.add_format(
{"top": 1, "bottom": 5, "left": 1, "right": 5, "bg_color": "yellow"}
),
)
workbook.close()
def main():
print("Getting high-level song list...")
songs = scrape_song_list()
print("Getting medium-level song list...")
songs += scrape_song_list("medium")
print("Getting low-level song list...")
songs += scrape_song_list("low")
print("Creating result workbook...")
construct_workbook(songs, f"Phigros RKS {datetime.now().strftime('%Y-%m-%d')}.xlsx")
print("Done.")
if __name__ == "__main__":
exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment