# Learning on League of Legends Data
### Written by Brian Perrett for funsies
"from __future__ import division\n",
"import numpy as np\n",
"import csv\n",
"from sklearn.linear_model import SGDClassifier\n",
"from sklearn.svm import SVC\n",
"import ast"
{'133': 'Quinn', '131': 'Diana', '134': 'Syndra', '24': 'Jax', '25': 'Morgana', '26': 'Zilean', '27': 'Singed', '20': 'Nunu', '21': 'Miss Fortune', '22': 'Ashe', '23': 'Tryndamere', '28': 'Evelynn', '29': 'Twitch', '161': "Vel'Koz", '4': 'Twisted Fate', '8': 'Vladimir', '120': 'Hecarim', '121': "Kha'Zix", '122': 'Darius', '267': 'Nami', '266': 'Aatrox', '126': 'Jayce', '127': 'Lissandra', '268': 'Azir', '59': 'Jarvan IV', '58': 'Renekton', '55': 'Katarina', '54': 'Malphite', '57': 'Maokai', '56': 'Nocturne', '51': 'Caitlyn', '50': 'Swain', '53': 'Blitzcrank', '412': 'Thresh', '115': 'Ziggs', '114': 'Fiora', '117': 'Lulu', '89': 'Leona', '111': 'Nautilus', '110': 'Varus', '113': 'Sejuani', '112': 'Viktor', '82': 'Mordekaiser', '83': 'Yorick', '80': 'Pantheon', '81': 'Ezreal', '119': 'Draven', '84': 'Akali', '85': 'Kennen', '429': 'Kalista', '3': 'Galio', '7': 'LeBlanc', '421': "Rek'Sai", '420': 'Illaoi', '245': 'Ekko', '102': 'Shyvana', '103': 'Ahri', '101': 'Xerath', '106': 'Volibear', '107': 'Rengar', '104': 'Graves', '105': 'Fizz', '39': 'Irelia', '38': 'Kassadin', '33': 'Rammus', '32': 'Amumu', '31': "Cho'Gath", '30': 'Karthus', '37': 'Sona', '36': 'Dr. Mundo', '35': 'Shaco', '34': 'Anivia', '432': 'Bard', '60': 'Elise', '61': 'Orianna', '62': 'Wukong', '63': 'Brand', '64': 'Lee Sin', '67': 'Vayne', '68': 'Rumble', '69': 'Cassiopeia', '254': 'Vi', '2': 'Olaf', '6': 'Urgot', '99': 'Lux', '98': 'Shen', '91': 'Talon', '90': 'Malzahar', '92': 'Riven', '223': 'Tahm Kench', '222': 'Jinx', '96': "Kog'Maw", '11': 'Master Yi', '10': 'Kayle', '13': 'Ryze', '12': 'Alistar', '15': 'Sivir', '14': 'Sion', '17': 'Teemo', '16': 'Soraka', '19': 'Warwick', '18': 'Tristana', '150': 'Gnar', '154': 'Zac', '157': 'Yasuo', '238': 'Zed', '236': 'Lucian', '48': 'Trundle', '86': 'Garen', '44': 'Taric', '45': 'Veigar', '42': 'Corki', '43': 'Karma', '40': 'Janna', '41': 'Gangplank', '1': 'Annie', '5': 'Xin Zhao', '9': 'Fiddlesticks', '201': 'Braum', '203': 'Kindred', '202': 'Jhin', '143': 'Zyra', '77': 'Udyr', '76': 'Nidalee', '75': 'Nasus', '74': 'Heimerdinger', '72': 'Skarner', '79': 'Gragas', '78': 'Poppy'}
"with open(\"F:/Box Sync/Documents/Python Documents/riot/champids.txt\", \"r\") as f:\n",
" champion_dict = ast.literal_eval(\n",
"global champion_dict"
"def getDataNoInGame(filename):\n",
" features = []\n",
" x = []\n",
" y = []\n",
" with open(filename, \"r\") as f:\n",
" reader = csv.reader(f)\n",
" r = 0\n",
" for row in reader:\n",
" if r == 0:\n",
" features = row[1:-14]\n",
" r += 1\n",
" continue\n",
" if len(row) == 0:\n",
" pass\n",
" else:\n",
" x.append(list(map(int, row[1:-14])))\n",
" y.append(list(map(int, row[-2]))[0])\n",
"# features = x[0]\n",
" return np.array(features), np.array(x), np.array(y)"
"def getData(filename):\n",
" \"\"\"\n",
" Includes data like first baron and first inhibitor.\n",
" \"\"\"\n",
" features = []\n",
" x = []\n",
" y = []\n",
" with open(filename, \"r\") as f:\n",
" reader = csv.reader(f)\n",
" r = 0\n",
" for row in reader:\n",
" if r == 0:\n",
" features = row[1:-2]\n",
" r += 1\n",
" continue\n",
" if len(row) == 0:\n",
" pass\n",
" else:\n",
" x.append(list(map(int, row[1:-2])))\n",
" y.append(list(map(int, row[-2]))[0])\n",
"# features = x[0]\n",
" return np.array(features), np.array(x), np.array(y)"
"def testCorrect(trainsize, x, y):\n",
" correct = 0\n",
" for i, x1 in enumerate(x[trainsize:]):\n",
" ans = clf.predict([x1])\n",
" # print(ans[0], int(y[trainsize + i]))\n",
" if int(ans[0]) == int(y[trainsize + i]):\n",
" correct += 1\n",
" print(\"{} of {}\".format(correct, len(x[trainsize:])))\n",
" print(correct/len(x[trainsize:]))"
"def getTopFeatures(clf, n):\n",
" indeces = clf.coef_[0].argsort()[-10:][::-1]\n",
" for ind in indeces:\n",
" if \"champ\" in features[ind]:\n",
" champ = features[ind].split(\"champ\")\n",
" f = \"{}{}\".format(champ[0], champion_dict[champ[1]])\n",
" else:\n",
" f = features[ind]\n",
" print(clf.coef_[0][ind], f)"
"def getWorstFeatures(clf, n):\n",
" indeces = clf.coef_[0].argsort()[:10]\n",
" for ind in indeces:\n",
" if \"champ\" in features[ind]:\n",
" champ = features[ind].split(\"champ\")\n",
" f = \"{}{}\".format(champ[0], champion_dict[champ[1]])\n",
" else:\n",
" f = features[ind]\n",
" print(clf.coef_[0][ind], f)"
### Diamond
# Stochastic Gradient Descent
features, x, y = getData("diamond.csv")
trainsize = int(.6 * len(x))
"trainsize = int(.6 * len(x))"
"execution_count": 10,
clf = SGDClassifier(loss="log", penalty="l2", n_iter=50)
clf.fit(x[:trainsize], y[:trainsize])
"[:trainsize], y[:trainsize])"
"1411 of 1620\n",
testCorrect(trainsize, x, y)
"<h4>Reading the top features list</h4>\n",
" <li>t100 = Team 1</li>\n",
" <li>t200 = Team 2</li>\n",
" <li>Basically, if you see a t200 on the topfeatures list, it means you want the other team to have that feature</li>\n",
"(4.7953455844123329, 't100firstinhibitor')\n",
"(2.8398065867578812, 't200:p10:Tryndamere')\n",
"(2.7132403942563212, 't100:p1:Akali')\n",
"(2.6592542376319486, 't200:p8:Shaco')\n",
"(2.6454619154157348, \"t200:p10:Vel'Koz\")\n",
"(2.5592554082180508, 't100:p2:Vayne')\n",
"(2.5542158265325039, 't200:p6:Braum')\n",
"(2.4720206064685226, 't200:p10:Veigar')\n",
"(2.4410791155160378, 't100firstbaron')\n",
"(2.4019936685062784, 't100:p2:Fizz')\n"
getTopFeatures(clf, 10)
The SGD algorithm put the largest weight on who took first tower, which makes a lot of sense, but I'm surprised it isn't something else, like first baron, or inhibitor.
Wow okay it totally is first inhibitor, I had an off by one error in my indexing. Everything makes sense now.
# Support Vector Machine
clf = SVC(kernel="linear")
clf.fit(x, y)
", y)"
"1582 of 1620\n",
testCorrect(trainsize, x, y)
The SVM was way slower, but with almost perfect predictions
"(1.9221614242489031, 't100firstinhibitor')\n",
"(1.4134330937206883, 't200:p8:Shaco')\n",
"(1.3393736042535727, 't200:p6:Xerath')\n",
"(1.2869091698333024, 't100:p4:Nami')\n",
"(1.2636548312007148, 't100:p2:Sejuani')\n",
"(1.2519879847705975, 't100:p1:Akali')\n",
"(1.2457964324805502, 't100:p2:Vayne')\n",
"(1.2235508451634636, 't200:p9:Karthus')\n",
"(1.1686349919970627, 't100:p1:Taric')\n",
"(1.1549587773247338, 't200:p8:Dr. Mundo')\n"
"getTopFeatures(clf, 10)"
"Basically, it looks like if the other team has a shaco on their team, it's a good thing for you. Both algorithms agreed there."
"### Bronze"
"# SGD"
"features, x, y = getData(\"bronze.csv\")\n",
"trainsize = int(.6 * len(x))"
"clf = SGDClassifier(loss=\"log\", penalty=\"l2\", n_iter=50)\n",
"[:trainsize], y[:trainsize])"
"811 of 958\n",
"testCorrect(trainsize, x, y)"
"(4.0780341544599672, 't100firstinhibitor')\n",
"(2.5687310089850075, 't100:p3:Lux')\n",
"(2.5534177132363873, 't200:p6:Teemo')\n",
"(2.5102583295827352, 't100:p4:Yasuo')\n",
"(2.4331208700436329, 't100:p3:Jax')\n",
"(2.4205954521501969, 't100:p3:Talon')\n",
"(2.4112080514270793, 't200:p10:Hecarim')\n",
"(2.374823489169323, 't200:p6:Annie')\n",
"(2.2954659333231957, 't200:p7:Tryndamere')\n",
"(2.2550787553853411, 't200:p9:Blitzcrank')\n"
"getTopFeatures(clf, 10)"
"# SVM"
"clf = SVC(kernel=\"linear\")\n",
", y)"
"944 of 958\n",
"testCorrect(trainsize, x, y)"
"(1.5563313625313706, 't200:p10:Hecarim')\n",
"(1.3725625528065544, 't100firstinhibitor')\n",
"(1.3466930431097506, 't100:p4:Nasus')\n",
"(1.2287842619682321, 't100:p5:Draven')\n",
"(1.2132124630750809, 't200:p10:Nidalee')\n",
"(1.2053695635800641, 't200:p7:Volibear')\n",
"(1.1892381567730586, 't200:p6:Talon')\n",
"(1.161076445045945, 't200:p6:Gnar')\n",
"(1.1101318204266222, 't100:p5:Sion')\n",
"(1.0891531280511229, 't100:p1:Annie')\n"
"getTopFeatures(clf, 10)"
"(-1.6203523849847805, 't200:p10:Illaoi')\n",
"(-1.4782681058126228, 't200firstinhibitor')\n",
"(-1.3142544008570698, \"t200:p6:Kog'Maw\")\n",
"(-1.2448348290728046, 't200:p7:Azir')\n",
"(-1.2402458430467633, 't200:p9:Twisted Fate')\n",
"(-1.1827097915564944, 't200:p8:Karthus')\n",
"(-1.1575872702935666, 't100:p1:Zed')\n",
"(-1.1551235540340952, 't200:p8:Sejuani')\n",
"(-1.0832806158864661, 't100:p1:Nunu')\n",
"(-1.0397595238540016, 't100:p1:Xerath')\n"
"getWorstFeatures(clf, 10)"
"Apparently hecarim is not good on this patch or something."
"### Challenger"
"features, x, y = getData(\"challenger.csv\")\n",
"trainsize = int(.8 * len(x))"
"clf = SGDClassifier(loss=\"log\", penalty=\"l2\", n_iter=50)\n",
"[:trainsize], y[:trainsize])"
"62 of 71\n",
"testCorrect(trainsize, x, y)"
"(9.2586255948382767, 't100firstinhibitor')\n",
"(3.7565552243529998, 't100firstbaron')\n",
"(3.3940741582075091, 't200:p6:Lucian')\n",
"(3.3369911715648146, 't200:p9:Riven')\n",
"(3.3343600665079878, 't100:p4:Wukong')\n",
"(3.3325219357224443, 't100:p5:Vayne')\n",
"(3.1399380092940716, 't200:p10:spell112')\n",
"(2.9670288920162071, 't100:p4:Janna')\n",
"(2.7963696663082223, 't200:p8:Ezreal')\n",
"(2.7907344689232145, 't200:p10:Corki')\n"
"getTopFeatures(clf, 10)"
"(-10.689015417418142, 't200firstinhibitor')\n",
"(-6.1745348150526489, 't200firstbaron')\n",
"(-3.7201689568111775, 't200:p10:spell114')\n",
"(-3.3219388866008033, 't100:p3:Gragas')\n",
"(-2.729922620612883, 't100:p2:spell13')\n",
"(-2.6749337303808884, 't100:p1:Viktor')\n",
"(-2.6670562367840533, \"t100:p1:Rek'Sai\")\n",
"(-2.5987716570977666, 't100:p4:Bard')\n",
"(-2.4199281798587533, 't200:p7:spell17')\n",
"(-2.294666173835473, 't100:p3:spell112')\n"
"getWorstFeatures(clf, 10)"
"This may just be that my challenger dataset is relatively smaller than the others, but it is interesting to see the large weight on inhibitor and baron. \n",
"It is much larger than it was in Bronze which would imply that getting first baron or inhib for challenger games is much more indicitive of winning."
"# SVM"
"clf = SVC(kernel=\"linear\")\n",
", y)"
"71 of 71\n",
"testCorrect(trainsize, x, y)"
"(0.84376448830477058, 't100firstinhibitor')\n",
"(0.39022169412339591, 't200:p10:Corki')\n",
"(0.35319143837353273, 't200:p9:Lee Sin')\n",
"(0.34980695636668141, 't100:p4:Wukong')\n",
"(0.34157096431957373, 't200:p10:Ekko')\n",
"(0.33817091259529097, 't200:p7:spell11')\n",
"(0.33729562183535411, 't100firstbaron')\n",
"(0.30714224877813301, 't200:p7:Ahri')\n",
"(0.2891027284345008, 't200:p7:spell121')\n",
"(0.28865677196628964, 't100firstdragon')\n"
"getTopFeatures(clf, 10)"
"# What if we didn't know about barons and inhibs?"
"### Diamond"
"#### SGD"
"features, x, y = getDataNoInGame(\"diamond.csv\")\n",
"trainsize = int(.6 * len(x))"
"clf = SGDClassifier(loss=\"log\", penalty=\"l2\", n_iter=50)\n",
"[:trainsize], y[:trainsize])"
"844 of 1620\n",
"testCorrect(trainsize, x, y)"
"SGD has a much harder time discerning who wins, which means that the champions are more or less balanced.\n",
"Only classified 52 % of them correctly. Basically a coin flip."
"#### SVM"
"clf = SVC(kernel=\"linear\")\n",
", y)"
"1270 of 1620\n",
"testCorrect(trainsize, x, y)"
"Wow almost 80% correct on the test set?? Imbalance?"
"(1.8183912726840334, 't200:p9:Rumble')\n",
"(1.6730483627122275, 't200:p6:Yorick')\n",
"(1.6678794419393672, 't100:p1:Akali')\n",
"(1.6447259451347747, 't100:p1:Taric')\n",
"(1.6005601262474496, 't200:p6:Teemo')\n",
"(1.5721088539867876, 't200:p9:Zac')\n",
"(1.4964099580946537, 't200:p8:Twitch')\n",
"(1.4839422463911134, 't200:p9:Heimerdinger')\n",
"(1.4432712419660707, 't200:p8:Sejuani')\n",
"(1.4190523825849422, 't100:p3:Illaoi')\n"
"getTopFeatures(clf, 10)"
"Suggests that Rumble and Yorick are not good picks right now. Whereas Akali is."
