Skip to content

Instantly share code, notes, and snippets.

@0e4ef622
Created February 5, 2020 07:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 0e4ef622/f10568bc3e937e88bb90aec3c49de5b3 to your computer and use it in GitHub Desktop.
Save 0e4ef622/f10568bc3e937e88bb90aec3c49de5b3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import os
import re
import requests
from time import sleep
countries = [
"US", "RU", "DE", "PL", "FR", "JP", "CA", "BR", "GB", "TW",
"KR", "CN", "AU", "ID", "UA", "PH", "CL", "FI", "AR", "NL",
"SE", "SG", "MX", "MY", "ES", "IT", "HK", "TH", "VN", "NO",
"CZ", "TR", "BY", "AT", "BE", "PT", "RO", "DK", "HU", "LT",
"KZ", "NZ", "PE", "CH", "CO", "IL", "EE", "BG", "SK", "LV",
"GR", "VE", "RS", "IE", "HR", "SA", "UY", "ZA", "AE", "SI",
"IN", "EC", "MA", "CR", "MD", "DO", "BN", "EG", "RE", "TN",
"MO", "PA", "DZ", "MN", "PY", "KW", "PR", "BO", "GE", "SV",
"QA", "GT", "LU", "UZ", "KG", "MK", "KH", "IS", "BA", "JO",
"PK", "TT", "HN", "CY", "NI", "MV", "BH", "BD", "PF", "LB",
"IQ", "GU", "AZ", "NP", "AL", "MT", "MM", "OM", "NC", "IR",
"AM", "GP", "MQ", "JM", "LA", "LK", "PS", "MU", "FO", "SY",
"JE", "ME", "GF", "IM", "SR", "MP", "BZ", "BB", "LY", "AW",
"AX", "SD", "GG", "MG", "LI", "BS", "KE", "GL", "BM", "EU",
"CI", "GY", "LC", "AD", "GI", "VI", "TJ", "AG", "PM", "SN",
"CK", "AQ", "CF", "LR", "ER"
]
def get_user_ids(text):
return re.findall("https://osu\\.ppy\\.sh/users/(\\d+)", text);
def mkdir(path):
try:
os.mkdir(path);
except FileExistsError:
pass
def process_country(country):
prev_ids = [0]*50
for i in range(1, 201):
if len(prev_ids) < 50:
print("\rProcessing %s... %d/%d " % (country, i-1, i-1))
return
print("\rProcessing %s... %d/200 " % (country, i), end='')
if os.path.exists("%s/page%03d" % (country, i)):
print(" %s/page%03d exists, skipping..." % (country, i))
with open("%s/page%03d" % (country, i), "r") as f:
prev_ids = [line.strip() for line in f]
else:
while True:
r = requests.get("https://osu.ppy.sh/rankings/osu/performance?country=%s&page=%d" % (country, i), allow_redirects=False)
if r.status_code == 429:
print("\nRate limited (HTTP 429), waiting 10 seconds")
sleep(10)
continue
r.raise_for_status()
user_ids = get_user_ids(r.text)
if user_ids == prev_ids:
print("\rProcessing %s... %d/%d " % (country, i-1, i-1))
return
prev_ids = user_ids
with open("%s/page%03d" % (country, i), "x") as f:
f.write("\n".join(user_ids))
f.write("\n")
break
sleep(0.5)
print()
mkdir("country_leaderboard_pages")
os.chdir("country_leaderboard_pages")
for country in countries:
mkdir(country)
print("\rProcessing %s..." % country, end='')
process_country(country)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment