Created
May 30, 2020 16:39
-
-
Save jdmoore7/5da71871e5a408c654a4915dc10b7195 to your computer and use it in GitHub Desktop.
NCAA_page_rank_gist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SQL | |
import sqlite3 | |
conn = sqlite3.connect('acc1819.db') | |
c = conn.cursor() | |
# Get columns from game table | |
game_columns = c.execute(""" | |
PRAGMA table_info(games); | |
""").fetchall() | |
g_col = [game_columns[i][1] for i in range(len(game_columns))] | |
# Get columns from box table | |
box_columns = c.execute(""" | |
PRAGMA table_info(box_scores); | |
""").fetchall() | |
b_col = [box_columns[i][1] for i in range(len(box_columns))] | |
# Join tables on GameId | |
join_tuples = c.execute(""" | |
SELECT * | |
FROM games g | |
INNER JOIN | |
box_scores b | |
ON g.GameId = b.GameId | |
""").fetchall() | |
join_col = [] | |
join_col.extend(g_col+b_col) | |
# Create Dataframe with appropriate column names | |
import pandas as pd | |
game_frame = pd.DataFrame(join_tuples,columns=join_col) # removes GameId col (once) which occurrs twice. | |
df = game_frame.loc[:,~game_frame.columns.duplicated()] | |
# For each GameID, find difference between scores and send to list | |
scores = df.groupby('GameId')['Score'].diff().to_list() | |
import numpy as np | |
scores = [scores[idx+1]*(-1) if np.isnan(s) == True else s for idx,s in enumerate(scores)] | |
df['delta'] = scores | |
regular_season = df[df['NeutralSite']==0] | |
# Make sure we build edges between teams where the delta is pointing the RIGHT direction | |
edges = [] | |
for idx,row in regular_season.iterrows(): | |
if idx % 2 == 1: | |
continue | |
else: | |
g_id = row[1] | |
away = row[3] | |
home = row[4] | |
relative_team = row[5] | |
delta = row[22] | |
if relative_team == away: | |
delta = -1* delta | |
if delta > 0: | |
winner = home | |
loser = away | |
points = delta | |
elif delta <0: | |
winner = away | |
loser = home | |
points = delta * -1 | |
else: | |
continue | |
edges.append((loser,winner,points)) #creating a network of who has beaten who. | |
import igraph | |
game_graph = igraph.Graph.TupleList(edges,weights=True,directed=True) | |
## This design allows points to travel FROM the losing team TO the winning team. | |
import operator | |
vectors = game_graph.pagerank() | |
e = {name:cen for cen, name in zip([v for v in vectors],game_graph.vs['name'])} | |
sorted_eigen = sorted(e.items(), key=operator.itemgetter(1),reverse=True) | |
rankings = [(i,tup[0],tup[1]) for i,tup in enumerate(sorted_eigen)] | |
import csv | |
with open('ExampleRankings.csv', 'w', newline='') as csvfile: | |
fieldnames = ['ranking','team','eigenvector centrality'] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
writer.writeheader() | |
for edge in rankings: | |
writer.writerow({ | |
'ranking': edge[0]+1, | |
'team': edge[1], | |
'eigenvector centrality': edge[2] | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment