kragen/visits.py

## visits.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Try out my Python implementation of the relational algebra on a real query.

I wrote it a few years back, and then I realized that the SQL version
of this query was much worse than the relational algebra version, so I
thought I would see if the query in my Python implementation of part
of the relational algebra was also better than the SQL version.

Here’s the original relational algebra source code:
routes_to ← σ_(start.route = end.route)(ρ_start(visits) × ρ_end(visits))
squarepairs ← ρ_a(square) × ρ_b(square)
neighbor ← σ_(a.x-1 ≤ b.x ≤ a.x+1 ∧ a.y-1 ≤ b.y ≤ a.y+1)(squarepairs)
rtn ← σ_(end.plano = a.plano ∧ end.row = a.row ∧ end.column = a.column)( \
         routes_to × neighbor)
out ← σ_(start.plano = '9' ∧ start.column = 'b' ∧ start.row = '4' ∧
         b.plano = '16' ∧ b.column = 'c' ∧ b.row = '4')(rtn)
Π_route(out)

What follows is more or less a transliteration of the above, with the
advantage that the transliteration is built on top of a system that
can translate the query into SQL.  (I had to hack up my relational
algebra implementation a bit to get it to support some new stuff and
not give me warnings in Python 2.5, so this won’t run with the 2004
version.)

"""
import relalg


def main():
    visits, square = relalg.table('visits'), relalg.table('square')
    start = visits.rename('start')
    end = visits.rename('end')
    routes_to = (start * end).select(
        start.column('route') == end.column('route'))

    neighbor = square.rename('neighbor')
    neighbors = (square * neighbor).select(
        neighbor.column('x')-1 <=   square.column('x'),
          square.column('x')   <= neighbor.column('x')+1,
        neighbor.column('y')-1 <=   square.column('y'),
          square.column('y')   <= neighbor.column('y')+1)

    route_to_neighbor = (routes_to * neighbors).select(
        end.column('plano')  == square.column('plano'),
        end.column('row')    == square.column('row'),
        end.column('column') == square.column('column'))

    query = (route_to_neighbor.select(neighbor.column('plano') == '16',
                                      neighbor.column('column') == 'c',
                                      neighbor.column('row') == '4',
                                      start.column('plano') == '9',
                                      start.column('column') == 'b',
                                      start.column('row') =='4')
                              .project(start.column('route')))

    # I pasted the query into SQLite to verify that it produced the
    # right results.  It does, except that it doesn’t include
    # `DISTINCT`, so there are duplicates.
    return query.sql()

if __name__ == '__main__':
    print main()

# Conclusion: NO, the above version is NOT better than SQL.  It’s 22
# lines, and the original SQL is only 12 lines, growing to 24 lines if
# I factor out the `routes_to` and `neighbors` views as above.  (See
# <http://gist.github.com/106231>.)

# However, it seems like a lot of the ugliness is purely syntactic:
# `.column('')` all over the place, for example.  And defining an
# inner-join operation here shouldn’t be rocket science, either.
	#!/usr/bin/python
	# -- coding: utf-8 --
	"""Try out my Python implementation of the relational algebra on a real query.

	I wrote it a few years back, and then I realized that the SQL version
	of this query was much worse than the relational algebra version, so I
	thought I would see if the query in my Python implementation of part
	of the relational algebra was also better than the SQL version.

	Here’s the original relational algebra source code:
	routes_to ← σ_(start.route = end.route)(ρ_start(visits) × ρ_end(visits))
	squarepairs ← ρ_a(square) × ρ_b(square)
	neighbor ← σ_(a.x-1 ≤ b.x ≤ a.x+1 ∧ a.y-1 ≤ b.y ≤ a.y+1)(squarepairs)
	rtn ← σ_(end.plano = a.plano ∧ end.row = a.row ∧ end.column = a.column)( \
	routes_to × neighbor)
	out ← σ_(start.plano = '9' ∧ start.column = 'b' ∧ start.row = '4' ∧
	b.plano = '16' ∧ b.column = 'c' ∧ b.row = '4')(rtn)
	Π_route(out)

	What follows is more or less a transliteration of the above, with the
	advantage that the transliteration is built on top of a system that
	can translate the query into SQL. (I had to hack up my relational
	algebra implementation a bit to get it to support some new stuff and
	not give me warnings in Python 2.5, so this won’t run with the 2004
	version.)

	"""
	import relalg


	def main():
	visits, square = relalg.table('visits'), relalg.table('square')
	start = visits.rename('start')
	end = visits.rename('end')
	routes_to = (start * end).select(
	start.column('route') == end.column('route'))

	neighbor = square.rename('neighbor')
	neighbors = (square * neighbor).select(
	neighbor.column('x')-1 <= square.column('x'),
	square.column('x') <= neighbor.column('x')+1,
	neighbor.column('y')-1 <= square.column('y'),
	square.column('y') <= neighbor.column('y')+1)

	route_to_neighbor = (routes_to * neighbors).select(
	end.column('plano') == square.column('plano'),
	end.column('row') == square.column('row'),
	end.column('column') == square.column('column'))

	query = (route_to_neighbor.select(neighbor.column('plano') == '16',
	neighbor.column('column') == 'c',
	neighbor.column('row') == '4',
	start.column('plano') == '9',
	start.column('column') == 'b',
	start.column('row') =='4')
	.project(start.column('route')))

	# I pasted the query into SQLite to verify that it produced the
	# right results. It does, except that it doesn’t include
	# `DISTINCT`, so there are duplicates.
	return query.sql()

	if __name__ == '__main__':
	print main()

	# Conclusion: NO, the above version is NOT better than SQL. It’s 22
	# lines, and the original SQL is only 12 lines, growing to 24 lines if
	# I factor out the `routes_to` and `neighbors` views as above. (See
	# <http://gist.github.com/106231>.)

	# However, it seems like a lot of the ugliness is purely syntactic:
	# `.column('')` all over the place, for example. And defining an
	# inner-join operation here shouldn’t be rocket science, either.