Skip to content

Instantly share code, notes, and snippets.

@johnconroy
Created November 13, 2010 13:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnconroy/675346 to your computer and use it in GitHub Desktop.
Save johnconroy/675346 to your computer and use it in GitHub Desktop.
Apply PageRank link authority algorithm to a list of Twitter users, as a first-pass measure for user authority
#pagerank calculation for a list of Twitter users
#implementing this was a bit of a head-scratcher...
f1="C:\\somedir\\list_of_edges_between_usrs.txt" #our social graph, as an edge-list
file1=open(f1,'r')
edges=file1.readlines()
l_nodes,l_outd,nodes2=[],[],[]
l_score, l_score_new, l_out_score=[],[],[]
#find all unique nodes
print 'getting unique nodes and outdegree of each...'
for x in range(len(edges)):
tab=edges[x].find('\t')
node=edges[x][:tab]
node2=edges[x][tab+1:-1]
nodes2.append(node2)
if node in l_nodes:
index=l_nodes.index(node)
l_outd[index]+=1
else:
l_nodes.append(node)
l_outd.append(1)
nodes3=[]
for node2 in nodes2:
if node2 in nodes3:
pass
else:
nodes3.append(node2)
for x in range(len(nodes3)):
if nodes3[x] not in l_nodes:
l_nodes.append(nodes3[x])
l_outd.append(1)
print 'unique nodes, outdegree done'
#create a parallel list of pagerank score. instantiate each score to 1
print 'instantiating pagerank scores @ 1...'
for x in range(len(l_nodes)):
l_score.append(1.0)
for x in range(len(l_nodes)):
l_score_new.append(1.0)
print 'getting out-scores...'
for x in range(len(l_score)):
l_out_score.append(l_score[x]/l_outd[x])
#get new pagerank scores
#iterate 49 times ... scores converge somewhere between 30-50 iterations (save every iteration to file to make sure they HAVE converged)
for n in range(49):
print 'Interation '+str(n)
fw="C:\\pagerank\\iter_"+str(n+1)+".txt"
filew=open(fw,'w')
#swap new list into old list, reset new list with 0s
for a in range(len(l_score)):
l_score[a]=l_score_new[a]
for b in range(len(l_score_new)):
l_score_new[b]=0.0
#get new outbound scores
for c in range(len(l_out_score)):
l_out_score[c]=l_score[c]/l_outd[c]
#now get new scores based on new outbound scores
for x in range(len(edges)):
tab=edges[x].find('\t')
fllr=edges[x][:tab]
flld=edges[x][tab+1:-1]
fllr_index=l_nodes.index(fllr)
fllr_score=l_out_score[fllr_index]
flld_index=l_nodes.index(flld)
l_score_new[flld_index]+=fllr_score
#apply damping factor of d=.85
d=0.85
for m in range(len(l_score_new)):
l_score_new[m]=(1.0-d)+d*l_score_new[m]
for x in range(len(l_nodes)):
filew.write(str(l_nodes[x])+","+str(l_outd[x])+","+str(l_out_score[x])+','+str(l_score[x])+','+str(l_score_new[x])+'\n')
filew.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment