Skip to content

Instantly share code, notes, and snippets.

@kristiker
Last active November 25, 2021 18:25
Show Gist options
  • Save kristiker/d27c8db64b889803c913564911155f2d to your computer and use it in GitHub Desktop.
Save kristiker/d27c8db64b889803c913564911155f2d to your computer and use it in GitHub Desktop.
Analyse your CS:GO Match History Data
"""
Analyse your CS:GO Match History Data
Original by DrMarioCSGO
https://pastebin.com/v5EVZMTq
https://www.reddit.com/r/GlobalOffensive/comments/qvdf4v/
Requirements:
pip install pandas
pip install matplotlib
pip install html5lib
pip install bs4
Usage:
You'll need to load your full match history (automatically possible
with Ban Checker for Steam), Ctrl+S(ave) it, then put the python file
in the same directory, put in your nickname at the top (me variable)
and just run the script (might take a minute).
"""
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib as mpl
import numpy as np
from collections import Counter, OrderedDict
import pprint
from datetime import datetime
import re
import pickle
import unicodedata
import warnings
###########
## Enter your in-game nick here:
me = "ϟ kристи'"
file_path = "Steam Community __ Counter-Strike_ Global Offensive __ Personal Game Data.html"
# because of shit special character nicknames some warnings are thrown when plotting
# you can disable them here
warnings.filterwarnings("ignore", message="Glyph 119967 missing from current font")
def generator():
# Generating list of dataframes
with open(file_path, "r",encoding="utf8") as f:
data = pd.read_html(f.read(),flavor="bs4",attrs = {"class":["csgo_scoreboard_inner_left","csgo_scoreboard_inner_right"]})
# Remove surrenders, short matches and scrimmage mirage
selected_for_deletion = []
for idx, dataframe in enumerate(data):
if (idx % 2) == 0:
if data[idx].at[0,0] == "Competitive Mirage Scrimmage":
selected_for_deletion.append(idx)
selected_for_deletion.append(idx+1)
else:
if data[idx].at[5,"Player Name"].split()[0] != str(16) and \
data[idx].at[5,"Player Name"].split()[0] != str(15) and \
data[idx].at[5,"Player Name"].split()[2] != str(16) and \
data[idx].at[5,"Player Name"].split()[2] != str(15):
selected_for_deletion.append(idx)
selected_for_deletion.append(idx-1)
if data[idx].at[5,"Player Name"].split()[0] == str(15) and \
data[idx].at[5,"Player Name"].split()[2] != str(15):
selected_for_deletion.append(idx)
selected_for_deletion.append(idx-1)
if data[idx].at[5,"Player Name"].split()[2] == str(15) and \
data[idx].at[5,"Player Name"].split()[0] != str(15):
selected_for_deletion.append(idx)
selected_for_deletion.append(idx-1)
selected_for_deletion.sort(reverse=True)
for each in selected_for_deletion:
del data[each]
# Generating match dictionaries
matches = [{} for i in range(len(data)//2)]
for i in range(len(data)):
if (i % 2) == 0:
world = data[i].at[0,0] # map seems to be a reserverd python word
world = world.split(' ', 1)[1] # remove Competitive word
matches[i//2]["world"] = world
time = data[i].at[1,0]
matches[i//2]["time"] = datetime.strptime(time,"%Y-%m-%d %H:%M:%S %Z")
waittime = data[i].at[3,0]
waittime = waittime.split()[2]
int(waittime[:2])*60+int(waittime[3:5])
matches[i//2]["waittime"] = int(waittime[:2])*60+int(waittime[3:5])
duration = data[i].at[4,0]
duration = duration.split()[2]
matches[i//2]["duration"] = int(duration[:2])*60+int(duration[3:5])
else:
### Playerlist
players = []
for j in range(0,5):
name = data[i].at[j,"Player Name"]
players.append(name)
for j in range(6,11):
name = data[i].at[j,"Player Name"]
players.append(name)
matches[i//2]["players"] = players
### Outcome & Result
result = data[i].at[5,"Player Name"]
result_team1 = int(result.split()[0])
result_team2 = int(result.split()[2])
if result_team1 == result_team2:
matches[i//2]["outcome"] = "draw"
else:
if players.index(me) <= 4:
if result_team1 < result_team2:
matches[i//2]["outcome"] = "loss"
else:
matches[i//2]["outcome"] = "won"
else:
if result_team1 < result_team2:
matches[i//2]["outcome"] = "won"
else:
matches[i//2]["outcome"] = "loss"
if result_team1 < result_team2: # 16:2 = 2:16 etc. for later analysis
result = str(result_team2) + " : " + str(result_team1)
matches[i//2]["result"] = result
### Personal Stats
if players.index(me) > 4:
myindex = players.index(me)+1
else:
myindex = players.index(me)
matches[i//2]["ping"] = int(data[i].at[myindex,"Ping"])
matches[i//2]["kills"] = int(data[i].at[myindex,"K"])
matches[i//2]["assists"] = int(data[i].at[myindex,"A"])
matches[i//2]["deaths"] = int(data[i].at[myindex,"D"])
#matches[i//2]["mvps"] = data[i].at[myindex,"★"].strip("★") # to do: NaN (is float type)
#matches[i//2]["HSP"] = data[i].at[myindex,"HSP"].strip("%") # dito
matches[i//2]["score"] = int(data[i].at[myindex,"Score"])
return matches
## The following two lines need only be run for the first time
## afterwards you can comment them and uncomment the following line for faster compilation
matches = generator()
pickle.dump( matches, open( "save.p", "wb" ) )
#matches = pickle.load( open( "save.p", "rb" ) )
#pprint.pprint(matches)
def debugging():
pd.set_option("display.max_columns", 101)
pd.set_option('display.expand_frame_repr', False)
for i in range(len(data)):
print("####### "+str(i)+" #######")
print(data[i])
print("\n\n")
#debugging()
def analysing_matchtimes():
x = []
times = []
for item in matches:
x.append(mpl.dates.date2num(item["time"])) # list of all datetimes
for i in range(len(x)):
time = x[i] % 1 + int(x[0]) # set all times to a day
times.append(time)
fig, ax = plt.subplots()
for i in range(len(times)):
if matches[i]["outcome"] == "loss":
ax.plot_date(x[i], times[i], 'ro',markersize=2)
elif matches[i]["outcome"] == "won":
ax.plot_date(x[i], times[i], 'go',markersize=2)
else:
ax.plot_date(x[i], times[i], 'mo',markersize=2)
ax.yaxis_date()
ax.yaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
fig.autofmt_xdate()
plt.show()
analysing_matchtimes()
def analysing_matches(StatOfInterest):
x = []
y = []
kills = []
for item in matches:
x.append(mpl.dates.date2num(item["time"])) # list of all datetimes
for i in range(len(x)):
if StatOfInterest == "kd":
kill = matches[i]["kills"]/max(matches[i]["deaths"], 1) #k/d
else:
kill = matches[i][StatOfInterest]
kills.append(kill)
fig, ax = plt.subplots()
for i in range(len(kills)):
if matches[i]["outcome"] == "loss":
ax.plot(x[i], kills[i], 'ro',markersize=2, label=StatOfInterest + " of Loss")
elif matches[i]["outcome"] == "won":
ax.plot(x[i], kills[i], 'go',markersize=2, label=StatOfInterest + " of Win")
else:
ax.plot(x[i], kills[i], 'mo',markersize=2, label=StatOfInterest + " of Draw")
N=50
y_padded = np.pad(kills, (N//2, N-1-N//2), mode='edge')
y_smooth = np.convolve(y_padded, np.ones((N,))/N, mode='valid')
plt.plot(x,y_smooth,"b-",label="Moving average")
plt.axvline(datetime(2020, 9, 11))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.title(StatOfInterest + " of matches throughout the years")
plt.xlabel("Date")
plt.ylabel(StatOfInterest)
handles, labels = plt.gca().get_legend_handles_labels() # no legend for every single match
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys())
fig.autofmt_xdate()
plt.show()
analysing_matches("waittime")
analysing_matches("duration")
analysing_matches("ping")
analysing_matches("kills")
#analysing_matches("assists") boring
analysing_matches("deaths")
analysing_matches("kd")
def analysing_matches_by_daytime(StatOfInterest):
x = []
y = []
kills = []
times=[]
for item in matches:
x.append(mpl.dates.date2num(item["time"])) # list of all datetimes
for i in range(len(x)):
time = x[i] % 1 + int(x[0]) # set all times to a day
times.append(time)
for i in range(len(x)):
if StatOfInterest == "kd":
kill = matches[i]["kills"]/max(matches[i]["deaths"], 1) #k/d
else:
kill = matches[i][StatOfInterest]
kills.append(kill)
fig, ax = plt.subplots()
for i in range(len(kills)):
if matches[i]["outcome"] == "loss":
ax.plot(times[i], kills[i], 'rx',markersize=2, label=StatOfInterest + " of Loss")
elif matches[i]["outcome"] == "won":
ax.plot(times[i], kills[i], 'gx',markersize=2, label=StatOfInterest + " of Win")
else:
ax.plot(times[i], kills[i], 'yx',markersize=2, label=StatOfInterest + " of Draw")
zipped = zip(times,kills)
zipped = sorted(zipped)
times,kills = list(zip(*zipped))
N=50
y_padded = np.pad(kills, (N//2, N-1-N//2), mode='edge')
y_smooth = np.convolve(y_padded, np.ones((N,))/N, mode='valid')
plt.plot(times,y_smooth,"b-",label="Moving average")
#plt.axvline(datetime(2020, 9, 11))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H-%M'))
plt.title(StatOfInterest + " of matches throughout time of day")
plt.xlabel("Time of day")
plt.ylabel(StatOfInterest)
handles, labels = plt.gca().get_legend_handles_labels() # no legend for every single match
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys())
fig.autofmt_xdate()
plt.show()
analysing_matches_by_daytime("waittime")
analysing_matches_by_daytime("duration")
analysing_matches_by_daytime("ping")
analysing_matches_by_daytime("kills")
#analysing_matches_by_daytime("assists") boring
analysing_matches_by_daytime("deaths")
analysing_matches_by_daytime("kd")
def analysing_players(top_x_players,readout="no"):
c = Counter()
for item in matches:
for player in item["players"]:
c[player] += 1
if readout == "yes":
pprint.pprint(c.most_common(top_x_players)[1:])
y = [count for tag, count in c.most_common(top_x_players)[1:]]
x = [tag for tag, count in c.most_common(top_x_players)[1:]]
fig, ax = plt.subplots()
width = 0.85
ind = np.arange(len(y))
ax.barh(ind, y, width)
ax.set_yticks(ind)
ax.set_yticklabels(x)
plt.tight_layout()
for i, v in enumerate(y):
plt.text(v, i, " "+str(v), color='blue', va='center')
plt.subplots_adjust(top=0.9, right=0.9)
plt.title('Number of times a teammate joined your ' +\
str(c.most_common(top_x_players)[0][1]) + " matches")
plt.show()
return c.most_common(top_x_players)
analysing_players(25,"yes")
def analysing_players_cumsum():
top_x_players = 25 # how many players to look at
all_datetimes = []
for item in matches:
all_datetimes.append(item["time"])
oldest_datetime=max(all_datetimes)
print(oldest_datetime)
fig, ax = plt.subplots()
list_of_most_common_players = analysing_players(top_x_players)
for player in list_of_most_common_players[1:]: # skip "me"
r = []
for i in range(len(matches)-1,-1,-1): # for some reason need to go through reversed
if player[0] in matches[i]["players"]:
r.append((matches[i]["time"],1))
r.append((oldest_datetime, 0)) # draw line to end of diagramm
x , v = [d[0] for d in r], [d[1] for d in r]
v = np.array(v).cumsum() # magic from stackoverflow
ax.plot(x, v, '-',label=player[0]) # replace shit special character names
ax.legend()
plt.title('Number of times a mate joined you for a match over time')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
fig.autofmt_xdate()
plt.show()
analysing_players_cumsum()
def analysing_results():
c = Counter()
for item in matches:
c[item["result"]] += 1
## for item in matches:
## if item["world"] == "Nuke": # Check for specific map
## c[item["result"]] += 1 # (comment out above for loop)
c = c.most_common()
SORT_ORDER = {'16 : 0': 0, '16 : 1': 1, '16 : 2': 2, '16 : 3': 3,\
'16 : 4': 4, '16 : 5': 5, '16 : 6': 6, '16 : 7': 7,\
'16 : 8': 8, '16 : 9': 9, '16 : 10': 10, '16 : 11': 11,\
'16 : 12': 12, '16 : 13': 13, '16 : 14': 14, '15 : 15': 15}
c.sort(key=lambda val: SORT_ORDER[val[0]])
pprint.pprint(c)
y = [count for tag, count in c]
x = [tag for tag, count in c]
fig, ax = plt.subplots()
width = 0.85
ind = np.arange(len(y))
ax.barh(ind, y, width)
ax.set_yticks(ind)
ax.set_yticklabels(x)
plt.tight_layout()
for i, v in enumerate(y):
plt.text(v, i, " "+str(v), color='blue', va='center')
plt.subplots_adjust(top=0.9, right=0.9)
plt.title('Number of times a specific result has occured')
plt.show()
analysing_results()
def analysing_maps():
c = Counter()
for item in matches:
c[item["world"]] += 1
pprint.pprint(c.most_common())
y = [count for tag, count in c.most_common()]
x = [tag for tag, count in c.most_common()]
fig, ax = plt.subplots()
width = 0.85
ind = np.arange(len(y))
ax.barh(ind, y, width)
ax.set_yticks(ind)
ax.set_yticklabels(x)
plt.tight_layout()
for i, v in enumerate(y):
plt.text(v, i, " "+str(v), color='blue', va='center')
plt.subplots_adjust(top=0.9, right=0.9)
plt.title('Number of times a map has been played')
plt.show()
analysing_maps()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment