Skip to content

Instantly share code, notes, and snippets.

@christopher-dG
Last active November 25, 2017 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save christopher-dG/216e4a43618a9a68a03e9db48e30e66b to your computer and use it in GitHub Desktop.
Save christopher-dG/216e4a43618a9a68a03e9db48e30e66b to your computer and use it in GitHub Desktop.
CTB Max Combo Approximation
#!/usr/bin/env python3
# We want to guess how many combo units a CTB slider tends to be worth.
# We'll be looking at all catch-specific beatmaps with an FC as their top play,
# from 2010 to the present (Nov. 2017). For each beatmap, we want to count
# the max combo of the FC (this is guaranteed to be the max combo), the number
# of regular hit objects, and the number of sliders.
# To estimate slider combo, we'll compute:
# slidercombo = ((max combo - normal objects) / sliders)
# A smarter model would account for slider length,
# slider type, slider velocity, and tick rate.
import json
import os
import re
import requests
import statistics
site_url = "https://osu.ppy.sh/b/"
osu_url = "https://osu.ppy.sh/osu/"
api_url = "https://osu.ppy.sh/api/get_beatmaps?k=%s&m=2" % os.environ["OSU_API_KEY"] # noqa
combo_regex = re.compile("<td><strong>Max Combo</strong></td><td>([0-9]+)</td>") # noqa
misses_regex = re.compile("<td><strong>Misses</strong></td><td>([0-9]+)</td>")
beatmaps = {}
date = "201x-01-01 00:00:00"
for year in range(7): # 2010 - 2017
maps = json.loads(requests.get("%s&since=%s" % (api_url, date.replace("x", str(year)))).text) # noqa
l = len(maps)
for i, bmap in enumerate(maps):
print("Map %d/%d (201%d)" % (i + 1, l, year))
if bmap["approved"] not in ["4", "2", "1"]:
print("Not ranked")
continue
b_id = bmap["beatmap_id"]
if b_id in beatmaps:
continue
try:
text = requests.get("%s%s" % (osu_url, b_id)).text
website = requests.get("%s%s" % (site_url, b_id)).text
except:
print("Request error")
continue
if not website.startswith("\n"):
# Some new maps are only available on the new site,
# whose HTML I don't feel like looking at.
print("New site")
continue
combo_matches = combo_regex.findall(website)
if not combo_matches:
print("No combo match")
continue
misses_matches = misses_regex.findall(website)
if not misses_matches:
print("No misses match")
continue
if misses_matches[0] != "0":
print("Not FC")
continue
for i, line in enumerate(text.split("\n")):
if "[HitObjects]" in line:
break
else:
print("No hitobjects")
continue
beatmaps[b_id] = {}
beatmaps[b_id]["combo"] = int(combo_matches[0])
beatmaps[b_id]["sliders"] = []
objects = 0
for line in text.split("\n")[i+1:]:
if "|" in line: # A slider.
beatmaps[b_id]["sliders"].append(line.strip())
else: # I don't think we can account for spinners unfortunately.
objects += 1
if not beatmaps[b_id]["sliders"]:
print("No sliders")
beatmaps.pop(b_id)
else:
beatmaps[b_id]["objs"] = objects
with open("maps.json", "w") as f:
json.dump(beatmaps, f)
slidercombos = [(b["combo"] - b["objs"]) / len(b["sliders"]) for b in beatmaps.values()] # noqa
print("Mean: %f" % statistics.mean(slidercombos))
print("Median: %f" % statistics.median(slidercombos))
print("STD: %f" % statistics.stdev(slidercombos))
# Mean: 2.442353
# Median: 2.333743
# STD: 0.414389
# What have we learned?
# This tells us that sliders tend to be short.
# The minumum combo of a slider is 2, and so we can see that most
# sliders stick to this minimum. A quick AT playthrough of a few
# CTB maps confirms this as well.
# We can also see that long, slow sliders pull the mean up a little bit,
# while there's no possibility for low outliers.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment