Skip to content

Instantly share code, notes, and snippets.

@MiscellaneousStuff
Created November 19, 2021 05:02
Show Gist options
  • Save MiscellaneousStuff/509303436e887ba0a4c2732511ad23a2 to your computer and use it in GitHub Desktop.
Save MiscellaneousStuff/509303436e887ba0a4c2732511ad23a2 to your computer and use it in GitHub Desktop.
replay_scraper.py
import concurrent.futures
import requests
import time
import json
import os
CONNECTIONS = 10
champ_ids = {}
with open("champ_ids.txt") as f:
content = f.read()
lines = content.split("\n")
for l in lines:
ln = l.split(":")
champ = ln[1].strip()
champ_id = int(ln[0])
champ_ids[champ] = champ_id
def handle_req(url, body):
req = requests.request(
'POST',
url,
data=json.dumps(body),
headers={
"Content-Type": "application/json"
}
)
time.sleep(0.5)
return req
def get_leaderboard(page_start=1, page_end=1, region="euw1"):
"""
EUW = euw1
NA = na1
"""
players = []
leaderboard_url = "https://u.gg/api"
leaderboard_req_body = lambda p: {
"operationName": "getRankedLeaderboard",
"query": "query getRankedLeaderboard($page: Int, $queueType: Int, $regionId: String!) {\n leaderboardPage(page: $page, queueType: $queueType, regionId: $regionId) {\n totalPlayerCount\n topPlayerMostPlayedChamp\n players {\n iconId\n losses\n lp\n overallRanking\n rank\n summonerLevel\n summonerName\n tier\n wins\n __typename\n }\n __typename\n }\n}\n",
"variables": {
"page": p,
"queueType": 420, # Ranked Solo/Duo
"regionId": region
}
}
i = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=CONNECTIONS) as executor:
future_to_summoner_name = (executor.submit(
handle_req,
leaderboard_url,
leaderboard_req_body(page+1)
) for page in range(page_start, page_end+1))
for future in concurrent.futures.as_completed(future_to_summoner_name):
try:
data = future.result()
data = json.loads(data.content)
data = data["data"]["leaderboardPage"]["players"]
except Exception as exc:
data = str(type(exc))
finally:
players += data
i += 1
return players
def get_matches(summoner_names, champs, target_patch, outfile="", win_only=False):
matches_url = "https://u.gg/api"
match_ids = set()
matches_req_body = lambda summoner_name: {
"operationName": "FetchMatchSummaries",
"query": "query FetchMatchSummaries($championId: [Int], $page: Int, $queueType: [Int], $regionId: String!, $role: [Int], $seasonId: Int!, $summonerName: String!) {\n fetchPlayerMatchSummaries(\n championId: $championId\n page: $page\n queueType: $queueType\n regionId: $regionId\n role: $role\n seasonId: $seasonId\n summonerName: $summonerName\n ) {\n finishedMatchSummaries\n totalNumMatches\n matchSummaries {\n assists\n championId\n cs\n damage\n deaths\n gold\n items\n jungleCs\n killParticipation\n kills\n level\n matchCreationTime\n matchDuration\n matchId\n maximumKillStreak\n primaryStyle\n queueType\n regionId\n role\n runes\n subStyle\n summonerName\n summonerSpells\n psHardCarry\n psTeamPlay\n lpInfo {\n lp\n placement\n promoProgress\n promoTarget\n promotedTo {\n tier\n rank\n __typename\n }\n __typename\n }\n teamA {\n championId\n summonerName\n teamId\n role\n hardCarry\n teamplay\n __typename\n }\n teamB {\n championId\n summonerName\n teamId\n role\n hardCarry\n teamplay\n __typename\n }\n version\n visionScore\n win\n __typename\n }\n __typename\n }\n}\n",
"variables": {
"championId": [champ_ids[c] for c in champs],
"page": 1, # Finds max of 20 games of a single champ per patch (people rarely play more than this so to keep the code much simpler, I'm only checking a maximum of 20 games of the same champion per summoner per patch.)
"queueType": [420], # 420 = solo/duo
"regionId": "euw1",
"role": [],
"seasonId": 16,
"summonerName": summoner_name
}
}
if outfile:
# remove old outfile
try:
os.remove(outfile)
except OSError:
pass
with open(outfile, "a+") as f:
f.write(target_patch + "\n")
f.write(",".join(champs) + "\n")
f.write(f"top {len(summoner_names)} ranked summoners\n")
i = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=CONNECTIONS) as executor:
future_to_match_id = (executor.submit(
handle_req,
matches_url,
matches_req_body(name)
) for name in summoner_names)
for future in concurrent.futures.as_completed(future_to_match_id):
try:
data = future.result()
data = json.loads(data.content)
data = data["data"]["fetchPlayerMatchSummaries"]["matchSummaries"]
except Exception as exc:
data = str(type(exc))
finally:
for match in data:
if type(match) == str:
print('This replay failed', match, data)
break
if match["version"] == target_patch:
if (win_only and match["win"]) or (not win_only):
match_ids.add(match["matchId"])
if outfile:
with open(outfile, "a+") as f:
f.write(str(match["matchId"]) + "\n")
i += 1
return match_ids
def get_replays(start_idx=1, end_idx=1, win_only=False, champs=["Miss Fortune"]):
fname_champlst = ["-".join(c.split(" ")) for c in champs]
fname_champs = f'{",".join(fname_champlst)}'
fname_nums = f"{((end_idx - start_idx)+1) * 100}({start_idx}-{end_idx})"
fname_win_only = f'{"win_only" if win_only else "win_or_loss"}'
fname = f"{fname_champs}_{fname_nums}_{fname_win_only}.txt"
leaderboard = get_leaderboard(
page_start=start_idx,
page_end=end_idx)
players = [p["summonerName"] for p in leaderboard]
matches = get_matches(
summoner_names=players,
champs=champs, # Logical OR search for these champs
target_patch="11_21",
outfile=f"{fname}",
win_only=win_only
)
print("match count:", len(matches))
game_ids = set()
files = [f"./{fname}"]
for file in files:
if file.endswith(".txt"):
path = os.path.join("./", file)
with open(path, "r") as f:
content = f.read()
content = content.split("\n")
game_ids = game_ids.union(set(content[3:]))
with open(fname, "w") as f:
for game_id in game_ids:
f.write(game_id + "\n")
return game_ids
if __name__ == "__main__":
start_idx = 1
stop_idx = 360
win_only = False
champs = ["Miss Fortune", "Nami"]
game_ids = get_replays(start_idx, stop_idx, win_only, champs)
print('Number of replays to DL:', len(game_ids))
@MiscellaneousStuff
Copy link
Author

Generate a list of the top 36,000 players on the EUW leaderboard and get game IDs for a specific match matching a specific criteria.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment