afrendeiro/pairwiseAlign.py

## pairwiseAlign.py
#! /usr/bin/python

# Forward-recursion with pruning
# An algorithm for doing exact alignment of two sequences
# Forward recursion is used, with pruning of cells

# two sequences to align
S1 = 'APPLR'
S2 = 'APPLS'

# the DP matrix as a dict, prepopulated  with None
H = {}
for l in range(0, len(S1) + 1):
	for i in range(0, len(S2) + 1):
		H[(l,i)] = None

# the end cell
Hn = (len(S1),len(S2))

# K, lower bound score
K = 737
# Queue
Q = []

def upperBound(v):
	"""
	finds an upper bound of the score of
	the alignment from a cell v to the end-cell hN
	"""
	return 737 - H[v]
	#raise NotImplementedError("Upper score bound not yet implemented")

def getForwardNeighbours(v):
	"""
	finds all forward neighbour cells to current cell.
	Requires a tuple of coordinates (v).
	Returns list of tuples of neighbours in right order.
	"""
	if v[0] >= 1 and v[1] >= 1 and v[0] < len(S1) and  v[1] < len(S2):
		return [(v[0], v[1] + 1), (v[0] + 1, v[1]), (v[0] + 1, v[1] + 1)]
	else:
		return []

def getScore(v, w):
	"""
	Calculates the score of going from one cell (v) to another (w).
	Requires a tuple of coordinates (v).
	Returns list of tuples of neighbours in right order.
	"""
	if v[0] == w[0] or v[1] == w[1]:
		return H[v] - 1
	else:
		return H[v] + 1

# Add sequences to the DP matrix
for l in range(1, len(S1) + 1):
	H[(l,0)] = S1[l - 1]

for l in range(1, len(S2) + 1):
	H[(0,l)] = S2[l - 1]

# Set current cell, "v" to start cell
v = (1,1)
# Best score of an alignment from H0 to u
Pu = 0
H[v] = Pu
# Push current (start) cell to the queue
Q.append(v)

# while queue has elements do
while Q != []:
	# remove v from queue
	v = Q.pop(0)
	# set best score so far to the current cell's score
	Su = Pu
	if H[v] + upperBound(v) >= K:
		for neighbour in getForwardNeighbours(v):
			if neighbour not in Q:
				Q.append(neighbour)
				# calculate score and assign as best
				Pu = Su + getScore(v, neighbour)
				# store the score of the w cell in the matrix
				H[neighbour] = Pu
			else:
				# pick maximum score and assign as best
				Pu = max(Pu, Su + getScore(v, neighbour))
				# store the score of the w cell in the matrix
				H[neighbour] = Pu

print(Su)
	#! /usr/bin/python

	# Forward-recursion with pruning
	# An algorithm for doing exact alignment of two sequences
	# Forward recursion is used, with pruning of cells

	# two sequences to align
	S1 = 'APPLR'
	S2 = 'APPLS'

	# the DP matrix as a dict, prepopulated with None
	H = {}
	for l in range(0, len(S1) + 1):
	for i in range(0, len(S2) + 1):
	H[(l,i)] = None

	# the end cell
	Hn = (len(S1),len(S2))

	# K, lower bound score
	K = 737
	# Queue
	Q = []

	def upperBound(v):
	"""
	finds an upper bound of the score of
	the alignment from a cell v to the end-cell hN
	"""
	return 737 - H[v]
	#raise NotImplementedError("Upper score bound not yet implemented")

	def getForwardNeighbours(v):
	"""
	finds all forward neighbour cells to current cell.
	Requires a tuple of coordinates (v).
	Returns list of tuples of neighbours in right order.
	"""
	if v[0] >= 1 and v[1] >= 1 and v[0] < len(S1) and v[1] < len(S2):
	return [(v[0], v[1] + 1), (v[0] + 1, v[1]), (v[0] + 1, v[1] + 1)]
	else:
	return []

	def getScore(v, w):
	"""
	Calculates the score of going from one cell (v) to another (w).
	Requires a tuple of coordinates (v).
	Returns list of tuples of neighbours in right order.
	"""
	if v[0] == w[0] or v[1] == w[1]:
	return H[v] - 1
	else:
	return H[v] + 1

	# Add sequences to the DP matrix
	for l in range(1, len(S1) + 1):
	H[(l,0)] = S1[l - 1]

	for l in range(1, len(S2) + 1):
	H[(0,l)] = S2[l - 1]

	# Set current cell, "v" to start cell
	v = (1,1)
	# Best score of an alignment from H0 to u
	Pu = 0
	H[v] = Pu
	# Push current (start) cell to the queue
	Q.append(v)

	# while queue has elements do
	while Q != []:
	# remove v from queue
	v = Q.pop(0)
	# set best score so far to the current cell's score
	Su = Pu
	if H[v] + upperBound(v) >= K:
	for neighbour in getForwardNeighbours(v):
	if neighbour not in Q:
	Q.append(neighbour)
	# calculate score and assign as best
	Pu = Su + getScore(v, neighbour)
	# store the score of the w cell in the matrix
	H[neighbour] = Pu
	else:
	# pick maximum score and assign as best
	Pu = max(Pu, Su + getScore(v, neighbour))
	# store the score of the w cell in the matrix
	H[neighbour] = Pu

	print(Su)