Skip to content

Instantly share code, notes, and snippets.

@AlphaSheep
Last active March 19, 2017 09:03
Show Gist options
  • Save AlphaSheep/7102b2dfaa5030465fcfbc0279a538f4 to your computer and use it in GitHub Desktop.
Save AlphaSheep/7102b2dfaa5030465fcfbc0279a538f4 to your computer and use it in GitHub Desktop.
Cubing degrees of separation
#!/usr/bin/python3
import json
import time
def tic():
tic.start = time.time()
def toc():
print('Time elapsed:', round(time.time() - tic.start, 3),'s\n')
return (time.time() - tic.start)
def save(var, varName):
with open(varName+'.json','w') as saveFile:
json.dump(var, saveFile)
def load(varName):
with open(varName+'.json') as loadFile:
result = json.load(loadFile)
return result
tic()
print('Loading Distance Matrix')
compsList = load('compsList')
degrees = load('degrees')
toc()
for x in range(6):
compCount = 0
for i in range(len(degrees)):
for j in range(len(degrees[i])):
if degrees[i][j] == x:
compCount += 1
print('Distance: {:.0f} Number of competition combinations: {:8.0f}, ({:6.2f} %)'.format(x, compCount, compCount/(len(compsList)*(len(compsList)-1))*100))
toc()
maxDegree = max([max(d) for d in degrees])
print('Maximum degree of separation: {}'.format(maxDegree))
print('Searching for longest chains')
longest = 5
chainIndexes = []
for i in range(len(degrees)-1):
for j in range(i+1, len(degrees[i])):
# for i in range(len(degrees)):
# for j in range(len(degrees[i])):
if degrees[i][j] >= longest:
chainIndexes.append((i,j))
print('Distance:', degrees[i][j], '\t', i, compsList[i], '-->', compsList[j], j)
if degrees[i][j] == 0 and not i==j:
print(degrees[i][j], i, compsList[i], '-->', compsList[j], j, sep='\t')
save(chainIndexes, 'chainIndexes')
toc()
print('Longest Chains')
chains = []
for i,j in chainIndexes:
#for i,j in [(2936,2937)]:
thisChain = [] #[[] for _ in range(maxDegree+1)]
for dist in range(maxDegree+1):
for link in range(len(degrees)):
if degrees[i][link] == dist and degrees[j][link] == maxDegree-dist:
thisChain.append(link)
break
chains.append(thisChain)
print(compsList[i], '-->', compsList[j])
print(' ('+' -- '.join([compsList[link] for link in thisChain])+')')
print()
print()
save(chains,'chains')
toc()
print('Maximum distance from other competitions')
maxdistances = [max(d) for d in degrees]
for i in range(1,6):
count = sum([1 for d in maxdistances if d == i])
print ('Distance: {:.0f}\t Number of competitions: {:.0f}'.format(i, count))
toc()
print('Average distance from other competitions')
averagedistances = [sum(d)/len(d) for d in degrees]
averagedistances = [(i, averagedistances[i]) for i in range(len(averagedistances))]
averagedistances.sort(key=lambda x: x[1])
for i in range(20):
x, dist = averagedistances[i]
print(i+1,'.\t', compsList[x],' '*(30-len(compsList[x])), 'Average:', round(dist,3) ,' \tMax:',maxdistances[x])
print('...')
for i in range(len(averagedistances)-20, len(averagedistances)):
x, dist = averagedistances[i]
print(i+1,'.\t', compsList[x],' '*(30-len(compsList[x])), 'Average:', round(dist,3) ,' \tMax:',maxdistances[x])
toc()
#!/usr/bin/python3
import json
import time
import math
def tic():
tic.start = time.time()
def toc():
print('Time elapsed:', round(time.time() - tic.start, 3),'s\n')
return (time.time() - tic.start)
def tocx():
return (time.time() - tic.start)
def save(var, varName):
with open(varName+'.json','w') as saveFile:
json.dump(var, saveFile)
def load(varName):
with open(varName+'.json') as loadFile:
result = json.load(loadFile)
return result
tic()
print('Loading Distance Matrix')
compsList = load('compsList')
degrees = load('degrees')
compsIndex = {compsList[i]: i for i in range(len(compsList))}
nCompConnections = [sum([1 for d in dists if d == 1]) for dists in degrees]
toc()
print('Loading competition attendance data')
compsAttended = load('compsAttended')
compReg = load('compReg')
toc()
print('Assigning competitions to competitors')
compsPerPerson = []
people = []
for p in sorted(compsAttended.keys()):
people.append(p)
compsPerPerson.append([])
for comp in compsAttended[p]:
compsPerPerson[-1].append(compsIndex[comp])
print('Number of people:', len(people))
toc()
print('Assigning competitors to competitions')
peoplePerComp = []
peopleIndex = {people[i]: i for i in range(len(people))}
for c in compsList:
peoplePerComp.append([])
for p in compReg[c]:
peoplePerComp[-1].append(peopleIndex[p])
toc()
print('Grouping people by identical comp attendance')
i = 0
peopleGroupedDict = {}
compsPerPeopleGroup = {}
for p in range(len(people)):
compsid = str(sorted(compsPerPerson[p]))
if not compsid in peopleGroupedDict.keys():
peopleGroupedDict[compsid] = []
compsPerPeopleGroup[compsid] = sorted(compsPerPerson[p], key = lambda c: -nCompConnections[c])
peopleGroupedDict[compsid].append(people[p])
compCombos = sorted(peopleGroupedDict.keys())
nCombos = len(compCombos)
print(nCombos, 'unique comp combinations')
save(compCombos, 'compCombos')
save(peopleGroupedDict, 'peopleGroupedDict')
save(compsPerPeopleGroup, 'compsPerPeopleGroup')
peopleGroups = []
compsLookup = []
for pg in compCombos:
peopleGroups.append(peopleGroupedDict[pg])
compsLookup.append(compsPerPeopleGroup[pg])
toc()
print('Average distance per competitor')
avgDistByPeople = []
def timef(s):
s = round(s)
return '{:.0f}:{:02.0f}:{:02.0f}'.format(math.floor(s/3600), math.floor(s/60)%60, s%60)
counter = 0
total = len(peopleGroups)*len(peopleGroups)
ttotal = 6000
peopleDistanceCounts = []
try:
peopleDistanceCounts = load('peopleDistanceCounts')
avgDistByPeople = load('avgDistByPeople')
except FileNotFoundError:
for pgrp1 in range(len(peopleGroups)):
theseDists = [0 for _ in range(6)]
for pgrp2 in range(len(peopleGroups)):
if pgrp1 == pgrp2:
theseDists[0] += len(peopleGroups[pgrp2])
continue
key = (min(pgrp1, pgrp2), max(pgrp1, pgrp2))
dist = 5
for c1 in compsLookup[pgrp1]:
if c1 in compsLookup[pgrp2]:
dist = 0
break
else:
for c1 in compsLookup[pgrp1]:
for c2 in compsLookup[pgrp2]:
if degrees[c1][c2] < dist:
dist = degrees[c1][c2]
if dist == 1:
break
if dist == 1:
break
theseDists[dist] += len(peopleGroups[pgrp2])
counter += 1
if not counter%1000000:
ttotal = tpassed/fractionComplete
if not counter%200000:
fractionComplete = counter/total
tpassed = tocx()
tremain = ttotal - tpassed
print(round(fractionComplete*100,3), '%\t',
timef(tpassed), 'passed', timef(ttotal), 'total', timef(tremain), 'remaining. ',
'Rng', (round(min(avgDistByPeople),3), round(max(avgDistByPeople),3)))
peopleDistanceCounts.append(theseDists)
avgDistByPeople.append(sum([i*theseDists[i] for i in range(6)])/sum(theseDists))
save(peopleDistanceCounts, 'peopleDistanceCounts')
save(avgDistByPeople, 'avgDistByPeople')
print('Range:', (round(min(avgDistByPeople),3), round(max(avgDistByPeople),3)))
toc()
print('Expanding counts to individual competitors')
distByPeople = {}
for pg in range(len(peopleGroups)):
for p in peopleGroups[pg]:
distByPeople[p] = peopleDistanceCounts[pg]
toc()
print('Competitors sorted by maximum distance')
maxDists = [sum([1 for d in distByPeople[p] if d > 0])-1 for p in distByPeople.keys()]
for i in range(1,6):
count = sum([1 for d in maxDists if d == i])
print ('Distance: {:.0f}\t Number of people: {:.0f}'.format(i, count))
toc()
print('Competitors sorted by average distance')
sortedPeople = sorted(distByPeople.keys(), key = lambda p: sum([i*distByPeople[p][i] for i in range(6)])/sum(distByPeople[p]))
for p in range(50):
avgDist = sum([i*distByPeople[sortedPeople[p]][i] for i in range(6)])/sum(distByPeople[sortedPeople[p]])
print(p+1, '.\t', sortedPeople[p], ' Average: ', round(avgDist, 3), '\tDistribution: ', distByPeople[sortedPeople[p]], sep='')
print('...')
for p in range(len(sortedPeople)-20, len(sortedPeople)):
avgDist = sum([i*distByPeople[sortedPeople[p]][i] for i in range(6)])/sum(distByPeople[sortedPeople[p]])
print(p+1, '.\t', sortedPeople[p], ' Average: ', round(avgDist, 3), '\tDistribution: ', distByPeople[sortedPeople[p]], sep='')
toc()
print("Competitors sorted by number of people they've been to competitions with")
sortedPeople = sorted(distByPeople.keys(), key = lambda p: -distByPeople[p][0])
for p in range(50):
avgDist = sum([i*distByPeople[sortedPeople[p]][i] for i in range(6)])/sum(distByPeople[sortedPeople[p]])
print(p+1, '.\t', sortedPeople[p], ' Average: ', round(avgDist, 3), '\tDistribution: ', distByPeople[sortedPeople[p]], sep='')
print('...')
for p in range(len(sortedPeople)-20, len(sortedPeople)):
avgDist = sum([i*distByPeople[sortedPeople[p]][i] for i in range(6)])/sum(distByPeople[sortedPeople[p]])
print(p+1, '.\t', sortedPeople[p], ' Average: ', round(avgDist, 3), '\tDistribution: ', distByPeople[sortedPeople[p]], sep='')
toc()
#!/usr/bin/python3
import json
import time
import mysql.connector
def tic():
tic.start = time.time()
def toc():
print('Time elapsed:', round(time.time() - tic.start, 3),'s\n')
return (time.time() - tic.start)
def save(var, varName):
with open(varName+'.json','w') as saveFile:
json.dump(var, saveFile)
def load(varName):
with open(varName+'.json') as loadFile:
result = json.load(loadFile)
return result
tic()
conn = mysql.connector.connect(host = "localhost",user="root",passwd="root", db="mysql")
cursor = conn.cursor()
# print('Creating Competitor Table...')
# cursor.execute('DROP TABLE Competitors;')
# cursor.execute('CREATE TABLE Competitors AS SELECT DISTINCT PersonId, CompetitionId FROM Results;')
# toc()
print('Finding competitions...')
cursor.execute('SELECT DISTINCT CompetitionId FROM Competitors;')
compsList = [c[0] for c in cursor.fetchall()]
save(compsList, 'compsList')
toc()
print('Fetching Competitor Attendance...')
counter = 0
try:
compRegCount = load('compRegCount')
compReg = load('compReg')
except FileNotFoundError:
compRegCount = {}
compReg = {}
for comp in compsList:
if comp not in compReg.keys():
cursor.execute('SELECT PersonId FROM Competitors WHERE CompetitionId=%s;', (comp,))
compReg[comp] = [p[0] for p in cursor.fetchall()]
compRegCount[comp] = len(compReg[comp])
counter += 1
if not counter%1000:
print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList), counter/len(compsList)*100))
toc()
print('Saving...')
save(compReg, 'compReg')
save(compRegCount, 'compRegCount')
toc()
compsAttended = {}
counter = 0
print('Fetching Competitions per Person...')
for comp in compsList:
for person in compReg[comp]:
if not person in compsAttended.keys():
compsAttended[person] = []
compsAttended[person].append(comp)
counter += 1
if not counter%1000:
print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList), counter/len(compsList)*100))
save(compsAttended, 'compsAttended')
toc()
def isFirstConnectionByPerson(comp1, comp2):
for person in compReg[comp1]:
if person in compReg[comp2]:
return True
return False
def isFirstConnection(n1, n2):
return n2 in connections[n1]
print('Finding Connections...')
try:
connections = load('connections')
except FileNotFoundError:
connections = [[] for _ in compsList]
counter = 1
for i in range(len(compsList)-1):
for j in range(i+1, len(compsList)):
if isFirstConnection(compsList[i], compsList[j]):
connections[i].append(j)
connections[j].append(i)
counter += 1
if not counter%50000:
print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*len(compsList)/2, counter/(len(compsList)*len(compsList)/2)*100))
save(connections, 'connections')
nConnections = [len(c) for c in connections]
toc()
# print('Checking first degree connectedness...')
# try:
# firstdegrees = load('firstdegrees')
# except FileNotFoundError:
# connections = load('connections')
# counter = 0
# firstdegrees = [[0 for _ in range(len(compsList))] for _ in range(len(compsList))]
# for i in range(len(compsList)-1):
# for j in range(i+1, len(compsList)):
# if isFirstConnection(i, j):
# firstdegrees[i][j] = 1
# firstdegrees[j][i] = 1
# counter += 1
# if not counter%200000:
# print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*(len(compsList)-1)/2, counter/(len(compsList)*(len(compsList)-1)/2)*100))
# save(firstdegrees, 'firstdegrees')
# toc()
#
#
# print('{:.2f} % first degree connections.'.format(sum([sum(d) for d in firstdegrees])/(len(compsList)*(len(compsList)-1))*100))
#
#
# print('Checking full connectedness...')
# counter = 0
# degrees = [[1 if firstdegrees[i][j] else 0 for i in range(len(compsList))] for j in range(len(compsList))]
#
#
# def connectionDegree(n1, n2):
# degree = 0
# buffer = [n1]
# connectionFound = False
# while not connectionFound:
# nextBuffer = []
# for b in buffer:
# if degrees[b][n2] > 0:
# return degree + degrees[b][n2]
# else:
# nextBuffer += connections[b]
# degree += 1
# buffer = list(set(nextBuffer))
# if degree > 4:
# raise Exception('Degree greater than max allowed: ({}, {})'.format(n1,n2))
#
#
# try:
# degrees = load('degrees')
# except FileNotFoundError:
# for i in range(len(compsList)-1):
# for j in range(i+1, len(compsList)):
# thisDegree = connectionDegree(i,j)
# degrees[i][j] = thisDegree
# degrees[j][i] = thisDegree
# counter += 1
# if not counter%50000:
# print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*(len(compsList)-1)/2, counter/(len(compsList)*(len(compsList)-1)/2)*100))
# toc()
# save(degrees, 'degrees')
print('Filling distance table...')
distances = [[0 if i==j else -1 for i in range(len(compsList))] for j in range(len(compsList))]
def getForwardBuffer(level):
return [(i,j) for i in range(len(compsList)) for j in range(i,len(compsList)) if distances[i][j] == level]
def getBackwardBuffer():
return [(i,j) for i in range(len(compsList)) for j in range(i,len(compsList)) if distances[i][j] == -1]
filled = 0
level = 0
total = len(compsList)*len(compsList)
while level<10:
if filled < total*0.33:
buffer = getForwardBuffer(level)
print('Level: {:.0f}, buffer length: {:.0f}. Forward search'.format(level, len(buffer)))
counter = 0
toc()
for i,j in buffer:
for k in connections[i]:
if distances[j][k] == -1:
distances[j][k] = level+1
distances[k][j] = level+1
filled += 1
for k in connections[j]:
if distances[i][k] == -1:
distances[i][k] = level+1
distances[k][i] = level+1
filled += 1
counter += 1
if not counter%20000:
print('filled: {:.0f}/{:.0f}\tlevel: {:.0f}\t({:.2f} % total, {:.2f} % of level)'.format(filled, total, level, filled/total*100, counter/len(buffer)*100))
else:
buffer = getBackwardBuffer()
print('Level: {:.0f}, buffer length: {:.0f}. Backward search'.format(level, len(buffer)))
counter = 0
for i,j in buffer:
for k in connections[i]:
if distances[j][k] == level and distances[i][j] == -1:
distances[i][j] = level+1
distances[j][i] = level+1
filled += 1
for k in connections[j]:
if distances[i][k] == level and distances[i][j] == -1:
distances[i][j] = level+1
distances[j][i] = level+1
filled += 1
counter += 1
if not counter%20000:
print('filled: {:.0f}/{:.0f}\tlevel: {:.0f}\t({:.2f} % total, {:.2f} % of level)'.format(filled, total, level, filled/total*100, counter/len(buffer)*100))
level += 1
toc()
degrees = distances
save(degrees, 'degrees')
for x in range(15):
compCount = 0
for i in range(len(degrees)):
for j in range(len(degrees[i])):
if distances[i][j] == x:
compCount += 1
print('By competition: Distance: {:3.0f}, Count: {:8.0f}, ({:6.2f} %)'.format(x, compCount, compCount/(len(compsList)*(len(compsList)-1))*100))
print('Maximum degree of separation: {}'.format(max([max(d) for d in degrees])))
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment