Last active
November 25, 2017 14:27
-
-
Save christopher-dG/216e4a43618a9a68a03e9db48e30e66b to your computer and use it in GitHub Desktop.
CTB Max Combo Approximation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# We want to guess how many combo units a CTB slider tends to be worth. | |
# We'll be looking at all catch-specific beatmaps with an FC as their top play, | |
# from 2010 to the present (Nov. 2017). For each beatmap, we want to count | |
# the max combo of the FC (this is guaranteed to be the max combo), the number | |
# of regular hit objects, and the number of sliders. | |
# To estimate slider combo, we'll compute: | |
# slidercombo = ((max combo - normal objects) / sliders) | |
# A smarter model would account for slider length, | |
# slider type, slider velocity, and tick rate. | |
import json | |
import os | |
import re | |
import requests | |
import statistics | |
site_url = "https://osu.ppy.sh/b/" | |
osu_url = "https://osu.ppy.sh/osu/" | |
api_url = "https://osu.ppy.sh/api/get_beatmaps?k=%s&m=2" % os.environ["OSU_API_KEY"] # noqa | |
combo_regex = re.compile("<td><strong>Max Combo</strong></td><td>([0-9]+)</td>") # noqa | |
misses_regex = re.compile("<td><strong>Misses</strong></td><td>([0-9]+)</td>") | |
beatmaps = {} | |
date = "201x-01-01 00:00:00" | |
for year in range(7): # 2010 - 2017 | |
maps = json.loads(requests.get("%s&since=%s" % (api_url, date.replace("x", str(year)))).text) # noqa | |
l = len(maps) | |
for i, bmap in enumerate(maps): | |
print("Map %d/%d (201%d)" % (i + 1, l, year)) | |
if bmap["approved"] not in ["4", "2", "1"]: | |
print("Not ranked") | |
continue | |
b_id = bmap["beatmap_id"] | |
if b_id in beatmaps: | |
continue | |
try: | |
text = requests.get("%s%s" % (osu_url, b_id)).text | |
website = requests.get("%s%s" % (site_url, b_id)).text | |
except: | |
print("Request error") | |
continue | |
if not website.startswith("\n"): | |
# Some new maps are only available on the new site, | |
# whose HTML I don't feel like looking at. | |
print("New site") | |
continue | |
combo_matches = combo_regex.findall(website) | |
if not combo_matches: | |
print("No combo match") | |
continue | |
misses_matches = misses_regex.findall(website) | |
if not misses_matches: | |
print("No misses match") | |
continue | |
if misses_matches[0] != "0": | |
print("Not FC") | |
continue | |
for i, line in enumerate(text.split("\n")): | |
if "[HitObjects]" in line: | |
break | |
else: | |
print("No hitobjects") | |
continue | |
beatmaps[b_id] = {} | |
beatmaps[b_id]["combo"] = int(combo_matches[0]) | |
beatmaps[b_id]["sliders"] = [] | |
objects = 0 | |
for line in text.split("\n")[i+1:]: | |
if "|" in line: # A slider. | |
beatmaps[b_id]["sliders"].append(line.strip()) | |
else: # I don't think we can account for spinners unfortunately. | |
objects += 1 | |
if not beatmaps[b_id]["sliders"]: | |
print("No sliders") | |
beatmaps.pop(b_id) | |
else: | |
beatmaps[b_id]["objs"] = objects | |
with open("maps.json", "w") as f: | |
json.dump(beatmaps, f) | |
slidercombos = [(b["combo"] - b["objs"]) / len(b["sliders"]) for b in beatmaps.values()] # noqa | |
print("Mean: %f" % statistics.mean(slidercombos)) | |
print("Median: %f" % statistics.median(slidercombos)) | |
print("STD: %f" % statistics.stdev(slidercombos)) | |
# Mean: 2.442353 | |
# Median: 2.333743 | |
# STD: 0.414389 | |
# What have we learned? | |
# This tells us that sliders tend to be short. | |
# The minumum combo of a slider is 2, and so we can see that most | |
# sliders stick to this minimum. A quick AT playthrough of a few | |
# CTB maps confirms this as well. | |
# We can also see that long, slow sliders pull the mean up a little bit, | |
# while there's no possibility for low outliers. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment