Skip to content

Instantly share code, notes, and snippets.

@jdmoore7
Created May 30, 2020 16:39
Show Gist options
  • Save jdmoore7/5da71871e5a408c654a4915dc10b7195 to your computer and use it in GitHub Desktop.
Save jdmoore7/5da71871e5a408c654a4915dc10b7195 to your computer and use it in GitHub Desktop.
NCAA_page_rank_gist
# SQL
import sqlite3
conn = sqlite3.connect('acc1819.db')
c = conn.cursor()
# Get columns from game table
game_columns = c.execute("""
PRAGMA table_info(games);
""").fetchall()
g_col = [game_columns[i][1] for i in range(len(game_columns))]
# Get columns from box table
box_columns = c.execute("""
PRAGMA table_info(box_scores);
""").fetchall()
b_col = [box_columns[i][1] for i in range(len(box_columns))]
# Join tables on GameId
join_tuples = c.execute("""
SELECT *
FROM games g
INNER JOIN
box_scores b
ON g.GameId = b.GameId
""").fetchall()
join_col = []
join_col.extend(g_col+b_col)
# Create Dataframe with appropriate column names
import pandas as pd
game_frame = pd.DataFrame(join_tuples,columns=join_col) # removes GameId col (once) which occurrs twice.
df = game_frame.loc[:,~game_frame.columns.duplicated()]
# For each GameID, find difference between scores and send to list
scores = df.groupby('GameId')['Score'].diff().to_list()
import numpy as np
scores = [scores[idx+1]*(-1) if np.isnan(s) == True else s for idx,s in enumerate(scores)]
df['delta'] = scores
regular_season = df[df['NeutralSite']==0]
# Make sure we build edges between teams where the delta is pointing the RIGHT direction
edges = []
for idx,row in regular_season.iterrows():
if idx % 2 == 1:
continue
else:
g_id = row[1]
away = row[3]
home = row[4]
relative_team = row[5]
delta = row[22]
if relative_team == away:
delta = -1* delta
if delta > 0:
winner = home
loser = away
points = delta
elif delta <0:
winner = away
loser = home
points = delta * -1
else:
continue
edges.append((loser,winner,points)) #creating a network of who has beaten who.
import igraph
game_graph = igraph.Graph.TupleList(edges,weights=True,directed=True)
## This design allows points to travel FROM the losing team TO the winning team.
import operator
vectors = game_graph.pagerank()
e = {name:cen for cen, name in zip([v for v in vectors],game_graph.vs['name'])}
sorted_eigen = sorted(e.items(), key=operator.itemgetter(1),reverse=True)
rankings = [(i,tup[0],tup[1]) for i,tup in enumerate(sorted_eigen)]
import csv
with open('ExampleRankings.csv', 'w', newline='') as csvfile:
fieldnames = ['ranking','team','eigenvector centrality']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for edge in rankings:
writer.writerow({
'ranking': edge[0]+1,
'team': edge[1],
'eigenvector centrality': edge[2]
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment