Skip to content

Instantly share code, notes, and snippets.

@kedarbellare
Created August 1, 2012 22:43
Show Gist options
  • Save kedarbellare/3231358 to your computer and use it in GitHub Desktop.
Save kedarbellare/3231358 to your computer and use it in GitHub Desktop.
dijkstra implementations
import heapq
import csv
# no heapq implementation
def up_heapify(L, i):
while i:
pi = (i-1)/2
if L[pi] > L[i]:
L[i], L[pi] = L[pi], L[i]
i = pi
else:
return
def down_heapify(L, i):
length = len(L)
while True:
li, ri = 2*i+1, 2*i+2
if ri >= length:
if li >= length: return
elif L[i] > L[li]:
(L[i], L[li]) = (L[li], L[i])
return
else:
si = li if L[li] < L[ri] else ri
if L[i] > L[si]:
(L[i], L[si]) = (L[si], L[i])
i = si
else: return
def extract_min(L):
L[0], L[len(L)-1] = L[len(L)-1], L[0]
minval = L.pop()
down_heapify(L, 0)
return minval
def heap_add(L, x):
L.append(x)
up_heapify(L, len(L)-1)
def get_paths(parent, v):
paths = {v: [v]}
def add_paths_from(x):
if x not in paths:
px_path = add_paths_from(parent[x])
paths[x] = px_path + [x]
return paths[x]
for x in parent:
if x not in paths:
add_paths_from(x)
return paths
def dijkstra_mod(G, v, dist_update, heap_pop, heap_push):
dist_so_far = {v: 0}
dist_heap = [(0, v)]
parent = {}
final_dist = {}
while len(dist_heap) > 0:
dw, w = heap_pop(dist_heap)
if w not in final_dist:
final_dist[w] = dw
for x in G[w]:
if x not in final_dist:
dx = dist_update(dw, G[w][x])
if x not in dist_so_far or dist_so_far[x] > dx:
dist_so_far[x] = dx
heap_push(dist_heap, (dx, x))
parent[x] = w
return final_dist, parent
def dijkstra(G, v, dist_update = lambda d1, d2: d1+d2):
return dijkstra_mod(G, v, dist_update, heapq.heappop, heapq.heappush)
def dijkstra_vanilla(G, v, dist_update = lambda d1, d2: d1+d2):
return dijkstra_mod(G, v, dist_update, extract_min, heap_add)
# D's Dijkstra implementation
def down_heapifyD(L, H, i):
llen, L = L[0], L[1]
while (True):
left = 2*i+1
right = 2*i+2
if (right >= llen):
if (left >= llen):
# leaf
return i
else:
# only one child
if L[i][0] > L[left][0]:
(L[i], L[left]) = (L[left], L[i])
H[L[i][1]] = i
H[L[left][1]] = left
return left
else:
# if i has two children...
if min(L[left][0], L[right][0]) >= L[i][0]: return i
if L[left][0] < L[right][0]:
(L[i], L[left]) = (L[left], L[i])
H[L[i][1]] = i
H[L[left][1]] = left
i = left
else:
# Swap into right child
(L[i], L[right]) = (L[right], L[i])
H[L[i][1]] = i
H[L[right][1]] = right
i = right
def up_heapifyD(L, H, i):
L = L[1]
while i:
parentIndex = (i-1)/2
if L[i][0] < L[parentIndex][0]:
L[i], L[parentIndex] = L[parentIndex], L[i]
H[L[i][1]] = i
H[L[parentIndex][1]] = parentIndex
i = parentIndex
else:
return i
return i
def heap_addD(L, H, node, value):
L[0] += 1
L[1][L[0]-1] = (value, node)
H[node] = L[0]-1
up_heapifyD(L, H, L[0]-1)
def heap_delD(L, H):
del H[L[1][0][1]]
if (L[0]>1):
L[0] -= 1
L[1][0] = L[1][L[0]]
H[L[1][0][1]] = 0
down_heapifyD(L, H, 0)
else:
L[0] -= 1
def dijkstraD(G,v):
L = [0, len(G)*[None]]
H = {}
heap_addD(L, H, v, 0)
final_dist = {}
while len(final_dist) < len(G) and L[0]:
(dist, w) = L[1][0]
final_dist[w] = dist
heap_delD(L, H)
for x in G[w]:
if x not in final_dist:
new_dist = final_dist[w] + G[w][x]
if x not in H:
heap_addD(L, H, x, new_dist)
elif new_dist < L[1][H[x]][0]:
L[1][H[x]] = (new_dist, x)
down_heapifyD(L, H, H[x])
return final_dist
############
#
# Test
def make_link(G, node1, node2, w):
if node1 not in G:
G[node1] = {}
if node2 not in G[node1]:
(G[node1])[node2] = 0
(G[node1])[node2] += w
if node2 not in G:
G[node2] = {}
if node1 not in G[node2]:
(G[node2])[node1] = 0
(G[node2])[node1] += w
return G
def make_random_graph(size):
from random import randint
G = {}
nodes = range(size)
for a in range(2*size):
a = a % size
b = (a + randint(1, size - 1)) % size
w = randint(0, 20)
make_link(G, a, b, w)
while True:
conn, unconn = connected_nodes(G, 0)
if len(unconn) == 0:
break
make_link(G, conn.pop(), unconn.pop(), randint(0, 20))
return G
def connected_nodes(G, root):
# just run a search
all_nodes = set(G.keys())
open_list = [root]
visited_nodes = set([root])
all_nodes.remove(root)
while len(open_list) > 0:
node = open_list.pop()
neighbors = G[node].keys()
for n in neighbors:
if n not in visited_nodes:
open_list.append(n)
visited_nodes.add(n)
all_nodes.remove(n)
return visited_nodes, all_nodes
def test():
import random
# shortcuts
(a,b,c,d,e,f,g) = ('A', 'B', 'C', 'D', 'E', 'F', 'G')
triples = ((a,c,3),(c,b,10),(a,b,15),(d,b,9),(a,d,4),(d,f,7),(d,e,3),
(e,g,1),(e,f,5),(f,g,2),(b,f,1))
G = {}
for (i,j,k) in triples:
make_link(G, i, j, k)
dist, predecessor = dijkstra(G, a)
assert dist[g] == 8 #(a -> d -> e -> g)
assert dist[b] == 11 #(a -> d -> e -> g -> f -> b)
print 'test passes'
def test_speed():
import time
sizes = [100,300,600,1000,3000,6000,10000,30000]
trials = 10
for size in sizes:
avg_time = 0.0
avg_time2 = 0.0
avg_timeD = 0.0
for _ in xrange(trials):
G = make_random_graph(size)
start_time = time.time()
dist, parent = dijkstra(G, 0)
avg_time += (time.time() - start_time)
start_time = time.time()
dist, parent = dijkstra_vanilla(G, 0)
avg_time2 += (time.time() - start_time)
start_time = time.time()
dist = dijkstraD(G, 0)
avg_timeD += (time.time() - start_time)
avg_time /= trials
avg_time2 /= trials
avg_timeD /= trials
print 'time taken for %d size graph, without heapq/with heapq: %.3f, without heapq D/with heapq: %.3f' % (size, avg_time2/avg_time, avg_timeD/avg_time)
def read_marvel_graph(filename):
# Read an undirected graph in CSV format. Each line is an edge
tsv = csv.reader(open(filename), delimiter='\t')
G, characters = {}, {}
index = 0
for (char, comic) in tsv:
if char not in characters:
characters[char] = index
index += 1
make_link(G, char, comic, 1)
charG = {}
for char1 in characters:
for comic in G[char1]:
for char2 in G[comic]:
if characters[char1] < characters[char2]:
make_link(charG, char1, char2, 1)
hopcharG = {}
for char1 in charG:
for char2 in charG[char1]:
count = charG[char1][char2]
charG[char1][char2] = 1.0/count
if characters[char1] < characters[char2]:
make_link(hopcharG, char1, char2, 1)
return hopcharG, charG
def create_imdb_graph(fname, wtfname):
G = {}
actors = {}
movies = {}
movie_wts = {}
index = 0
file = csv.reader(open(wtfname, 'rb'), delimiter='\t')
for row in file:
movie = '%s (%s)' % (row[0], row[1])
movie_wts[movie] = float(row[2])
file = csv.reader(open(fname, 'rb'), delimiter='\t')
for row in file:
actor = row[0]
movie = '%s (%s)' % (row[1], row[2])
if actor not in actors:
actors[actor] = index
index += 1
if movie not in movies:
movies[movie] = index
index += 1
make_link(G, actors[actor], movies[movie], movie_wts[movie])
return G, actors, movies
test()
test_speed()
hopcharG, charG = read_marvel_graph('marvel.tsv')
my_chars = ['SPIDER-MAN/PETER PAR', 'GREEN GOBLIN/NORMAN ',
'WOLVERINE/LOGAN ', 'PROFESSOR X/CHARLES ', 'CAPTAIN AMERICA']
numpaths = 0
for char1 in my_chars:
hopdist, hopparent = dijkstra(hopcharG, char1)
dist, parent = dijkstra(charG, char1)
paths = get_paths(parent, char1)
hoppaths = get_paths(hopparent, char1)
numcharpaths = 0
for char2 in dist:
if len(hoppaths[char2]) != len(paths[char2]):
numpaths += 1
numcharpaths += 1
print 'for', char1, '#paths=', numcharpaths
print numpaths
imdbG, actors, movies = create_imdb_graph('imdb-1.tsv', 'imdb-weights.tsv')
testImdb = {(u'Ali, Tony', u'Allen, Woody'): 0.5657,
(u'Auberjonois, Rene', u'MacInnes, Angus'): 0.0814,
(u'Avery, Shondrella', u'Dorsey, Kimberly (I)'): 0.7837,
(u'Bollo, Lou', u'Jeremy, Ron'): 0.4763,
(u'Byrne, P.J.', u'Clarke, Larry'): 0.109,
(u'Couturier, Sandra-Jessica', u'Jean-Louis, Jimmy'): 0.3649,
(u'Crawford, Eve (I)', u'Cutler, Tom'): 0.2052,
(u'Flemyng, Jason', u'Newman, Laraine'): 0.139,
(u'French, Dawn', u'Smallwood, Tucker'): 0.2979,
(u'Gunton, Bob', u'Nagra, Joti'): 0.2136,
(u'Hoffman, Jake (I)', u'Shook, Carol'): 0.6073,
#(u'Kamiki, Ry\xfbnosuke', u'Thor, Cameron'): 0.3644,
(u'Roache, Linus', u'Dreyfuss, Richard'): 0.6731,
(u'Sanchez, Phillip (I)', u'Wiest, Dianne'): 0.5083,
(u'Sheppard, William Morgan', u'Crook, Mackenzie'): 0.0849,
(u'Stan, Sebastian', u'Malahide, Patrick'): 0.2857,
(u'Tessiero, Michael A.', u'Molen, Gerald R.'): 0.2056,
(u'Thomas, Ken (I)', u'Bell, Jamie (I)'): 0.3941,
(u'Thompson, Sophie (I)', u'Foley, Dave (I)'): 0.1095,
(u'Tzur, Mira', u'Heston, Charlton'): 0.3642}
answer = {(u'Boone Junior, Mark', u'Del Toro, Benicio'): None,
(u'Braine, Richard', u'Coogan, Will'): None,
(u'Byrne, Michael (I)', u'Quinn, Al (I)'): None,
(u'Cartwright, Veronica', u'Edelstein, Lisa'): None,
(u'Curry, Jon (II)', u'Wise, Ray (I)'): None,
(u'Di Benedetto, John', u'Hallgrey, Johnathan'): None,
(u'Hochendoner, Jeff', u'Cross, Kendall'): None,
(u'Izquierdo, Ty', u'Kimball, Donna'): None,
(u'Jace, Michael', u'Snell, Don'): None,
(u'James, Charity', u'Tuerpe, Paul'): None,
(u'Kay, Dominic Scott', u'Cathey, Reg E.'): None,
(u'McCabe, Richard', u'Washington, Denzel'): None,
(u'Reid, Kevin (I)', u'Affleck, Rab'): None,
(u'Reid, R.D.', u'Boston, David (IV)'): None,
(u'Restivo, Steve', u'Preston, Carrie (I)'): None,
(u'Rodriguez, Ramon (II)', u'Mulrooney, Kelsey'): None,
(u'Rooker, Michael (I)', u'Grady, Kevin (I)'): None,
(u'Ruscoe, Alan', u'Thornton, Cooper'): None,
(u'Sloan, Tina', u'Dever, James D.'): None,
(u'Wasserman, Jerry', u'Sizemore, Tom'): None}
for actor1, actor2 in testImdb:
a1, a2 = actors[actor1], actors[actor2]
dist, parent = dijkstra(imdbG, a1, dist_update = lambda d1, d2: max(d1, d2))
assert dist[a2] == testImdb[(actor1, actor2)]
for actor1, actor2 in answer:
a1, a2 = actors[actor1], actors[actor2]
dist, parent = dijkstra(imdbG, a1, dist_update = lambda d1, d2: max(d1, d2))
answer[(actor1, actor2)] = dist[a2]
print answer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment