Created
November 19, 2021 05:02
-
-
Save MiscellaneousStuff/509303436e887ba0a4c2732511ad23a2 to your computer and use it in GitHub Desktop.
replay_scraper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import concurrent.futures | |
import requests | |
import time | |
import json | |
import os | |
CONNECTIONS = 10 | |
champ_ids = {} | |
with open("champ_ids.txt") as f: | |
content = f.read() | |
lines = content.split("\n") | |
for l in lines: | |
ln = l.split(":") | |
champ = ln[1].strip() | |
champ_id = int(ln[0]) | |
champ_ids[champ] = champ_id | |
def handle_req(url, body): | |
req = requests.request( | |
'POST', | |
url, | |
data=json.dumps(body), | |
headers={ | |
"Content-Type": "application/json" | |
} | |
) | |
time.sleep(0.5) | |
return req | |
def get_leaderboard(page_start=1, page_end=1, region="euw1"): | |
""" | |
EUW = euw1 | |
NA = na1 | |
""" | |
players = [] | |
leaderboard_url = "https://u.gg/api" | |
leaderboard_req_body = lambda p: { | |
"operationName": "getRankedLeaderboard", | |
"query": "query getRankedLeaderboard($page: Int, $queueType: Int, $regionId: String!) {\n leaderboardPage(page: $page, queueType: $queueType, regionId: $regionId) {\n totalPlayerCount\n topPlayerMostPlayedChamp\n players {\n iconId\n losses\n lp\n overallRanking\n rank\n summonerLevel\n summonerName\n tier\n wins\n __typename\n }\n __typename\n }\n}\n", | |
"variables": { | |
"page": p, | |
"queueType": 420, # Ranked Solo/Duo | |
"regionId": region | |
} | |
} | |
i = 0 | |
with concurrent.futures.ThreadPoolExecutor(max_workers=CONNECTIONS) as executor: | |
future_to_summoner_name = (executor.submit( | |
handle_req, | |
leaderboard_url, | |
leaderboard_req_body(page+1) | |
) for page in range(page_start, page_end+1)) | |
for future in concurrent.futures.as_completed(future_to_summoner_name): | |
try: | |
data = future.result() | |
data = json.loads(data.content) | |
data = data["data"]["leaderboardPage"]["players"] | |
except Exception as exc: | |
data = str(type(exc)) | |
finally: | |
players += data | |
i += 1 | |
return players | |
def get_matches(summoner_names, champs, target_patch, outfile="", win_only=False): | |
matches_url = "https://u.gg/api" | |
match_ids = set() | |
matches_req_body = lambda summoner_name: { | |
"operationName": "FetchMatchSummaries", | |
"query": "query FetchMatchSummaries($championId: [Int], $page: Int, $queueType: [Int], $regionId: String!, $role: [Int], $seasonId: Int!, $summonerName: String!) {\n fetchPlayerMatchSummaries(\n championId: $championId\n page: $page\n queueType: $queueType\n regionId: $regionId\n role: $role\n seasonId: $seasonId\n summonerName: $summonerName\n ) {\n finishedMatchSummaries\n totalNumMatches\n matchSummaries {\n assists\n championId\n cs\n damage\n deaths\n gold\n items\n jungleCs\n killParticipation\n kills\n level\n matchCreationTime\n matchDuration\n matchId\n maximumKillStreak\n primaryStyle\n queueType\n regionId\n role\n runes\n subStyle\n summonerName\n summonerSpells\n psHardCarry\n psTeamPlay\n lpInfo {\n lp\n placement\n promoProgress\n promoTarget\n promotedTo {\n tier\n rank\n __typename\n }\n __typename\n }\n teamA {\n championId\n summonerName\n teamId\n role\n hardCarry\n teamplay\n __typename\n }\n teamB {\n championId\n summonerName\n teamId\n role\n hardCarry\n teamplay\n __typename\n }\n version\n visionScore\n win\n __typename\n }\n __typename\n }\n}\n", | |
"variables": { | |
"championId": [champ_ids[c] for c in champs], | |
"page": 1, # Finds max of 20 games of a single champ per patch (people rarely play more than this so to keep the code much simpler, I'm only checking a maximum of 20 games of the same champion per summoner per patch.) | |
"queueType": [420], # 420 = solo/duo | |
"regionId": "euw1", | |
"role": [], | |
"seasonId": 16, | |
"summonerName": summoner_name | |
} | |
} | |
if outfile: | |
# remove old outfile | |
try: | |
os.remove(outfile) | |
except OSError: | |
pass | |
with open(outfile, "a+") as f: | |
f.write(target_patch + "\n") | |
f.write(",".join(champs) + "\n") | |
f.write(f"top {len(summoner_names)} ranked summoners\n") | |
i = 0 | |
with concurrent.futures.ThreadPoolExecutor(max_workers=CONNECTIONS) as executor: | |
future_to_match_id = (executor.submit( | |
handle_req, | |
matches_url, | |
matches_req_body(name) | |
) for name in summoner_names) | |
for future in concurrent.futures.as_completed(future_to_match_id): | |
try: | |
data = future.result() | |
data = json.loads(data.content) | |
data = data["data"]["fetchPlayerMatchSummaries"]["matchSummaries"] | |
except Exception as exc: | |
data = str(type(exc)) | |
finally: | |
for match in data: | |
if type(match) == str: | |
print('This replay failed', match, data) | |
break | |
if match["version"] == target_patch: | |
if (win_only and match["win"]) or (not win_only): | |
match_ids.add(match["matchId"]) | |
if outfile: | |
with open(outfile, "a+") as f: | |
f.write(str(match["matchId"]) + "\n") | |
i += 1 | |
return match_ids | |
def get_replays(start_idx=1, end_idx=1, win_only=False, champs=["Miss Fortune"]): | |
fname_champlst = ["-".join(c.split(" ")) for c in champs] | |
fname_champs = f'{",".join(fname_champlst)}' | |
fname_nums = f"{((end_idx - start_idx)+1) * 100}({start_idx}-{end_idx})" | |
fname_win_only = f'{"win_only" if win_only else "win_or_loss"}' | |
fname = f"{fname_champs}_{fname_nums}_{fname_win_only}.txt" | |
leaderboard = get_leaderboard( | |
page_start=start_idx, | |
page_end=end_idx) | |
players = [p["summonerName"] for p in leaderboard] | |
matches = get_matches( | |
summoner_names=players, | |
champs=champs, # Logical OR search for these champs | |
target_patch="11_21", | |
outfile=f"{fname}", | |
win_only=win_only | |
) | |
print("match count:", len(matches)) | |
game_ids = set() | |
files = [f"./{fname}"] | |
for file in files: | |
if file.endswith(".txt"): | |
path = os.path.join("./", file) | |
with open(path, "r") as f: | |
content = f.read() | |
content = content.split("\n") | |
game_ids = game_ids.union(set(content[3:])) | |
with open(fname, "w") as f: | |
for game_id in game_ids: | |
f.write(game_id + "\n") | |
return game_ids | |
if __name__ == "__main__": | |
start_idx = 1 | |
stop_idx = 360 | |
win_only = False | |
champs = ["Miss Fortune", "Nami"] | |
game_ids = get_replays(start_idx, stop_idx, win_only, champs) | |
print('Number of replays to DL:', len(game_ids)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Generate a list of the top 36,000 players on the EUW leaderboard and get game IDs for a specific match matching a specific criteria.