Skip to content

Instantly share code, notes, and snippets.

@aguestuser
Created April 26, 2015 23:44
Show Gist options
  • Save aguestuser/854245d7221545a0e9ae to your computer and use it in GitHub Desktop.
Save aguestuser/854245d7221545a0e9ae to your computer and use it in GitHub Desktop.
Matthew's Script for Calculating Centrality among LS entities
import sys
import MySQLdb as my
import networkx as nx
import csv
from IPython import embed
db = my.connect(host='localhost', user='littlesis', passwd='midm681_beet', db='littlesis')
cur = db.cursor(my.cursors.DictCursor)
# all positions
# sql = "SELECT l.entity1_id, l.entity2_id, l.category_id, r.is_current, e1.name AS name1, e2.name AS name2 FROM link l LEFT JOIN relationship r ON (r.id = l.relationship_id) LEFT JOIN entity e1 on (e1.id = l.entity1_id) LEFT JOIN entity e2 ON (e2.id = l.entity2_id) WHERE l.category_id = 1 AND l.is_reverse = 0"
# all board memberships
sql = "SELECT l.entity1_id, l.entity2_id, l.category_id, r.is_current, e1.name AS name1, e2.name AS name2, e1.primary_ext AS primary_ext1, e2.primary_ext AS primary_ext2 FROM link l LEFT JOIN relationship r ON (r.id = l.relationship_id) LEFT JOIN position p ON (p.relationship_id = l.relationship_id) LEFT JOIN entity e1 on (e1.id = l.entity1_id) LEFT JOIN entity e2 ON (e2.id = l.entity2_id) WHERE l.category_id = 1 AND l.is_reverse = 0 AND p.is_board = 1"
# all positions of people with positions in fortune 1000 companies
# first get fortune 1000 companies
# sql = "SELECT DISTINCT(le.entity_id) FROM ls_list_entity le WHERE le.list_id = 110 AND le.is_deleted = 0"
# cur.execute(sql)
# company_ids = [str(row['entity_id']) for row in cur.fetchall()]
# # then get all people with positions in fortune 1000 companies
# sql = "SELECT DISTINCT(r.entity1_id) FROM relationship r WHERE r.entity2_id IN (" + ",".join(company_ids) + ") AND r.category_id = 1 AND r.is_deleted = 0"
# cur.execute(sql)
# entity_ids = [str(row['entity1_id']) for row in cur.fetchall()]
# # finally get all positions of those people
# sql = "SELECT l.entity1_id, l.entity2_id, l.category_id, r.is_current, e1.name AS name1, e2.name AS name2 FROM link l LEFT JOIN relationship r ON (r.id = l.relationship_id) LEFT JOIN entity e1 on (e1.id = l.entity1_id) LEFT JOIN entity e2 ON (e2.id = l.entity2_id) WHERE l.entity1_id IN (" + ",".join(entity_ids) + ") AND l.category_id = 1 AND l.is_reverse = 0 AND r.is_deleted = 0"
cur.execute(sql)
links = cur.fetchall()
names = {}
exts = {}
g = nx.Graph()
# category_weights = {
# '1': 1,
# '2': 0.8,
# '3': 0.5,
# '4': 1,
# '5': 0.8,
# '6': 1,
# '7': 1`
# }
for i in range(len(links)):
link = links[i]
id1 = str(link['entity1_id'])
id2 = str(link['entity2_id'])
name1 = str(link['name1'])
name2 = str(link['name2'])
names[id1] = name1
names[id2] = name2
ext1 = str(link['primary_ext1'])
ext2 = str(link['primary_ext2'])
exts[id1] = ext1
exts[id2] = ext2
g.add_node(id1, name=name1)
g.add_node(id2, name=name2)
weight = (1 if link['is_current'] == '1' else 0.5)
g.add_edge(id1, id2, weight=weight)
nodes = nx.betweenness_centrality(g, weight='weight')
ids = sorted(nodes, key=nodes.get, reverse=True)
with open(sys.argv[1], 'wb') as csvfile:
writer = csv.writer(csvfile)
for id in ids:
writer.writerow([id, names[id], exts[id], nodes[id]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment