Last active
November 25, 2021 18:25
-
-
Save kristiker/d27c8db64b889803c913564911155f2d to your computer and use it in GitHub Desktop.
Analyse your CS:GO Match History Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Analyse your CS:GO Match History Data | |
Original by DrMarioCSGO | |
https://pastebin.com/v5EVZMTq | |
https://www.reddit.com/r/GlobalOffensive/comments/qvdf4v/ | |
Requirements: | |
pip install pandas | |
pip install matplotlib | |
pip install html5lib | |
pip install bs4 | |
Usage: | |
You'll need to load your full match history (automatically possible | |
with Ban Checker for Steam), Ctrl+S(ave) it, then put the python file | |
in the same directory, put in your nickname at the top (me variable) | |
and just run the script (might take a minute). | |
""" | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import matplotlib.dates as mdates | |
import matplotlib as mpl | |
import numpy as np | |
from collections import Counter, OrderedDict | |
import pprint | |
from datetime import datetime | |
import re | |
import pickle | |
import unicodedata | |
import warnings | |
########### | |
## Enter your in-game nick here: | |
me = "ϟ kристи'" | |
file_path = "Steam Community __ Counter-Strike_ Global Offensive __ Personal Game Data.html" | |
# because of shit special character nicknames some warnings are thrown when plotting | |
# you can disable them here | |
warnings.filterwarnings("ignore", message="Glyph 119967 missing from current font") | |
def generator(): | |
# Generating list of dataframes | |
with open(file_path, "r",encoding="utf8") as f: | |
data = pd.read_html(f.read(),flavor="bs4",attrs = {"class":["csgo_scoreboard_inner_left","csgo_scoreboard_inner_right"]}) | |
# Remove surrenders, short matches and scrimmage mirage | |
selected_for_deletion = [] | |
for idx, dataframe in enumerate(data): | |
if (idx % 2) == 0: | |
if data[idx].at[0,0] == "Competitive Mirage Scrimmage": | |
selected_for_deletion.append(idx) | |
selected_for_deletion.append(idx+1) | |
else: | |
if data[idx].at[5,"Player Name"].split()[0] != str(16) and \ | |
data[idx].at[5,"Player Name"].split()[0] != str(15) and \ | |
data[idx].at[5,"Player Name"].split()[2] != str(16) and \ | |
data[idx].at[5,"Player Name"].split()[2] != str(15): | |
selected_for_deletion.append(idx) | |
selected_for_deletion.append(idx-1) | |
if data[idx].at[5,"Player Name"].split()[0] == str(15) and \ | |
data[idx].at[5,"Player Name"].split()[2] != str(15): | |
selected_for_deletion.append(idx) | |
selected_for_deletion.append(idx-1) | |
if data[idx].at[5,"Player Name"].split()[2] == str(15) and \ | |
data[idx].at[5,"Player Name"].split()[0] != str(15): | |
selected_for_deletion.append(idx) | |
selected_for_deletion.append(idx-1) | |
selected_for_deletion.sort(reverse=True) | |
for each in selected_for_deletion: | |
del data[each] | |
# Generating match dictionaries | |
matches = [{} for i in range(len(data)//2)] | |
for i in range(len(data)): | |
if (i % 2) == 0: | |
world = data[i].at[0,0] # map seems to be a reserverd python word | |
world = world.split(' ', 1)[1] # remove Competitive word | |
matches[i//2]["world"] = world | |
time = data[i].at[1,0] | |
matches[i//2]["time"] = datetime.strptime(time,"%Y-%m-%d %H:%M:%S %Z") | |
waittime = data[i].at[3,0] | |
waittime = waittime.split()[2] | |
int(waittime[:2])*60+int(waittime[3:5]) | |
matches[i//2]["waittime"] = int(waittime[:2])*60+int(waittime[3:5]) | |
duration = data[i].at[4,0] | |
duration = duration.split()[2] | |
matches[i//2]["duration"] = int(duration[:2])*60+int(duration[3:5]) | |
else: | |
### Playerlist | |
players = [] | |
for j in range(0,5): | |
name = data[i].at[j,"Player Name"] | |
players.append(name) | |
for j in range(6,11): | |
name = data[i].at[j,"Player Name"] | |
players.append(name) | |
matches[i//2]["players"] = players | |
### Outcome & Result | |
result = data[i].at[5,"Player Name"] | |
result_team1 = int(result.split()[0]) | |
result_team2 = int(result.split()[2]) | |
if result_team1 == result_team2: | |
matches[i//2]["outcome"] = "draw" | |
else: | |
if players.index(me) <= 4: | |
if result_team1 < result_team2: | |
matches[i//2]["outcome"] = "loss" | |
else: | |
matches[i//2]["outcome"] = "won" | |
else: | |
if result_team1 < result_team2: | |
matches[i//2]["outcome"] = "won" | |
else: | |
matches[i//2]["outcome"] = "loss" | |
if result_team1 < result_team2: # 16:2 = 2:16 etc. for later analysis | |
result = str(result_team2) + " : " + str(result_team1) | |
matches[i//2]["result"] = result | |
### Personal Stats | |
if players.index(me) > 4: | |
myindex = players.index(me)+1 | |
else: | |
myindex = players.index(me) | |
matches[i//2]["ping"] = int(data[i].at[myindex,"Ping"]) | |
matches[i//2]["kills"] = int(data[i].at[myindex,"K"]) | |
matches[i//2]["assists"] = int(data[i].at[myindex,"A"]) | |
matches[i//2]["deaths"] = int(data[i].at[myindex,"D"]) | |
#matches[i//2]["mvps"] = data[i].at[myindex,"★"].strip("★") # to do: NaN (is float type) | |
#matches[i//2]["HSP"] = data[i].at[myindex,"HSP"].strip("%") # dito | |
matches[i//2]["score"] = int(data[i].at[myindex,"Score"]) | |
return matches | |
## The following two lines need only be run for the first time | |
## afterwards you can comment them and uncomment the following line for faster compilation | |
matches = generator() | |
pickle.dump( matches, open( "save.p", "wb" ) ) | |
#matches = pickle.load( open( "save.p", "rb" ) ) | |
#pprint.pprint(matches) | |
def debugging(): | |
pd.set_option("display.max_columns", 101) | |
pd.set_option('display.expand_frame_repr', False) | |
for i in range(len(data)): | |
print("####### "+str(i)+" #######") | |
print(data[i]) | |
print("\n\n") | |
#debugging() | |
def analysing_matchtimes(): | |
x = [] | |
times = [] | |
for item in matches: | |
x.append(mpl.dates.date2num(item["time"])) # list of all datetimes | |
for i in range(len(x)): | |
time = x[i] % 1 + int(x[0]) # set all times to a day | |
times.append(time) | |
fig, ax = plt.subplots() | |
for i in range(len(times)): | |
if matches[i]["outcome"] == "loss": | |
ax.plot_date(x[i], times[i], 'ro',markersize=2) | |
elif matches[i]["outcome"] == "won": | |
ax.plot_date(x[i], times[i], 'go',markersize=2) | |
else: | |
ax.plot_date(x[i], times[i], 'mo',markersize=2) | |
ax.yaxis_date() | |
ax.yaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) | |
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) | |
fig.autofmt_xdate() | |
plt.show() | |
analysing_matchtimes() | |
def analysing_matches(StatOfInterest): | |
x = [] | |
y = [] | |
kills = [] | |
for item in matches: | |
x.append(mpl.dates.date2num(item["time"])) # list of all datetimes | |
for i in range(len(x)): | |
if StatOfInterest == "kd": | |
kill = matches[i]["kills"]/max(matches[i]["deaths"], 1) #k/d | |
else: | |
kill = matches[i][StatOfInterest] | |
kills.append(kill) | |
fig, ax = plt.subplots() | |
for i in range(len(kills)): | |
if matches[i]["outcome"] == "loss": | |
ax.plot(x[i], kills[i], 'ro',markersize=2, label=StatOfInterest + " of Loss") | |
elif matches[i]["outcome"] == "won": | |
ax.plot(x[i], kills[i], 'go',markersize=2, label=StatOfInterest + " of Win") | |
else: | |
ax.plot(x[i], kills[i], 'mo',markersize=2, label=StatOfInterest + " of Draw") | |
N=50 | |
y_padded = np.pad(kills, (N//2, N-1-N//2), mode='edge') | |
y_smooth = np.convolve(y_padded, np.ones((N,))/N, mode='valid') | |
plt.plot(x,y_smooth,"b-",label="Moving average") | |
plt.axvline(datetime(2020, 9, 11)) | |
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) | |
plt.title(StatOfInterest + " of matches throughout the years") | |
plt.xlabel("Date") | |
plt.ylabel(StatOfInterest) | |
handles, labels = plt.gca().get_legend_handles_labels() # no legend for every single match | |
by_label = dict(zip(labels, handles)) | |
plt.legend(by_label.values(), by_label.keys()) | |
fig.autofmt_xdate() | |
plt.show() | |
analysing_matches("waittime") | |
analysing_matches("duration") | |
analysing_matches("ping") | |
analysing_matches("kills") | |
#analysing_matches("assists") boring | |
analysing_matches("deaths") | |
analysing_matches("kd") | |
def analysing_matches_by_daytime(StatOfInterest): | |
x = [] | |
y = [] | |
kills = [] | |
times=[] | |
for item in matches: | |
x.append(mpl.dates.date2num(item["time"])) # list of all datetimes | |
for i in range(len(x)): | |
time = x[i] % 1 + int(x[0]) # set all times to a day | |
times.append(time) | |
for i in range(len(x)): | |
if StatOfInterest == "kd": | |
kill = matches[i]["kills"]/max(matches[i]["deaths"], 1) #k/d | |
else: | |
kill = matches[i][StatOfInterest] | |
kills.append(kill) | |
fig, ax = plt.subplots() | |
for i in range(len(kills)): | |
if matches[i]["outcome"] == "loss": | |
ax.plot(times[i], kills[i], 'rx',markersize=2, label=StatOfInterest + " of Loss") | |
elif matches[i]["outcome"] == "won": | |
ax.plot(times[i], kills[i], 'gx',markersize=2, label=StatOfInterest + " of Win") | |
else: | |
ax.plot(times[i], kills[i], 'yx',markersize=2, label=StatOfInterest + " of Draw") | |
zipped = zip(times,kills) | |
zipped = sorted(zipped) | |
times,kills = list(zip(*zipped)) | |
N=50 | |
y_padded = np.pad(kills, (N//2, N-1-N//2), mode='edge') | |
y_smooth = np.convolve(y_padded, np.ones((N,))/N, mode='valid') | |
plt.plot(times,y_smooth,"b-",label="Moving average") | |
#plt.axvline(datetime(2020, 9, 11)) | |
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H-%M')) | |
plt.title(StatOfInterest + " of matches throughout time of day") | |
plt.xlabel("Time of day") | |
plt.ylabel(StatOfInterest) | |
handles, labels = plt.gca().get_legend_handles_labels() # no legend for every single match | |
by_label = dict(zip(labels, handles)) | |
plt.legend(by_label.values(), by_label.keys()) | |
fig.autofmt_xdate() | |
plt.show() | |
analysing_matches_by_daytime("waittime") | |
analysing_matches_by_daytime("duration") | |
analysing_matches_by_daytime("ping") | |
analysing_matches_by_daytime("kills") | |
#analysing_matches_by_daytime("assists") boring | |
analysing_matches_by_daytime("deaths") | |
analysing_matches_by_daytime("kd") | |
def analysing_players(top_x_players,readout="no"): | |
c = Counter() | |
for item in matches: | |
for player in item["players"]: | |
c[player] += 1 | |
if readout == "yes": | |
pprint.pprint(c.most_common(top_x_players)[1:]) | |
y = [count for tag, count in c.most_common(top_x_players)[1:]] | |
x = [tag for tag, count in c.most_common(top_x_players)[1:]] | |
fig, ax = plt.subplots() | |
width = 0.85 | |
ind = np.arange(len(y)) | |
ax.barh(ind, y, width) | |
ax.set_yticks(ind) | |
ax.set_yticklabels(x) | |
plt.tight_layout() | |
for i, v in enumerate(y): | |
plt.text(v, i, " "+str(v), color='blue', va='center') | |
plt.subplots_adjust(top=0.9, right=0.9) | |
plt.title('Number of times a teammate joined your ' +\ | |
str(c.most_common(top_x_players)[0][1]) + " matches") | |
plt.show() | |
return c.most_common(top_x_players) | |
analysing_players(25,"yes") | |
def analysing_players_cumsum(): | |
top_x_players = 25 # how many players to look at | |
all_datetimes = [] | |
for item in matches: | |
all_datetimes.append(item["time"]) | |
oldest_datetime=max(all_datetimes) | |
print(oldest_datetime) | |
fig, ax = plt.subplots() | |
list_of_most_common_players = analysing_players(top_x_players) | |
for player in list_of_most_common_players[1:]: # skip "me" | |
r = [] | |
for i in range(len(matches)-1,-1,-1): # for some reason need to go through reversed | |
if player[0] in matches[i]["players"]: | |
r.append((matches[i]["time"],1)) | |
r.append((oldest_datetime, 0)) # draw line to end of diagramm | |
x , v = [d[0] for d in r], [d[1] for d in r] | |
v = np.array(v).cumsum() # magic from stackoverflow | |
ax.plot(x, v, '-',label=player[0]) # replace shit special character names | |
ax.legend() | |
plt.title('Number of times a mate joined you for a match over time') | |
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) | |
fig.autofmt_xdate() | |
plt.show() | |
analysing_players_cumsum() | |
def analysing_results(): | |
c = Counter() | |
for item in matches: | |
c[item["result"]] += 1 | |
## for item in matches: | |
## if item["world"] == "Nuke": # Check for specific map | |
## c[item["result"]] += 1 # (comment out above for loop) | |
c = c.most_common() | |
SORT_ORDER = {'16 : 0': 0, '16 : 1': 1, '16 : 2': 2, '16 : 3': 3,\ | |
'16 : 4': 4, '16 : 5': 5, '16 : 6': 6, '16 : 7': 7,\ | |
'16 : 8': 8, '16 : 9': 9, '16 : 10': 10, '16 : 11': 11,\ | |
'16 : 12': 12, '16 : 13': 13, '16 : 14': 14, '15 : 15': 15} | |
c.sort(key=lambda val: SORT_ORDER[val[0]]) | |
pprint.pprint(c) | |
y = [count for tag, count in c] | |
x = [tag for tag, count in c] | |
fig, ax = plt.subplots() | |
width = 0.85 | |
ind = np.arange(len(y)) | |
ax.barh(ind, y, width) | |
ax.set_yticks(ind) | |
ax.set_yticklabels(x) | |
plt.tight_layout() | |
for i, v in enumerate(y): | |
plt.text(v, i, " "+str(v), color='blue', va='center') | |
plt.subplots_adjust(top=0.9, right=0.9) | |
plt.title('Number of times a specific result has occured') | |
plt.show() | |
analysing_results() | |
def analysing_maps(): | |
c = Counter() | |
for item in matches: | |
c[item["world"]] += 1 | |
pprint.pprint(c.most_common()) | |
y = [count for tag, count in c.most_common()] | |
x = [tag for tag, count in c.most_common()] | |
fig, ax = plt.subplots() | |
width = 0.85 | |
ind = np.arange(len(y)) | |
ax.barh(ind, y, width) | |
ax.set_yticks(ind) | |
ax.set_yticklabels(x) | |
plt.tight_layout() | |
for i, v in enumerate(y): | |
plt.text(v, i, " "+str(v), color='blue', va='center') | |
plt.subplots_adjust(top=0.9, right=0.9) | |
plt.title('Number of times a map has been played') | |
plt.show() | |
analysing_maps() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment