quad/psplit.py

## psplit.py
#
# At YOW!2010, Guy Steele gave a great talk entitled "How to Think about
# Parallel Programming---Not!"
#
# In it, he presented an implementation of a divide and conquer strategy for
# splitting a string.
#
# His solution was written in Fortress.
#
# People nearby me were confused.
#
# So, I sat down and wrote another implementation in a more common language:
#
#   Python.
#
# Hopefully, this pack of ugly drives home the point of how our current
# languages don't pack the idiomatic punch for parallelism.
#
# -- Scott Robinson <sr@thoughtworks.com>
#

import string

from Queue import Queue
from collections import namedtuple
from contextlib import contextmanager
from threading import Thread


@contextmanager
def spawn(func):
  q = Queue()

  t = Thread(target = func, args=(q, ))
  t.start()

  try:
    yield q
  finally:
    t.join()


class Chunk(str):
  def __add__(left, right):
    if isinstance(right, Chunk):
      return Chunk(str.__add__(left, right))
    elif isinstance(right, Segment):
      return Segment(str.__add__(left, right.head), right.words, right.tail)

  def flatten(self):
    return filter(None, [str(self)])


class Segment(namedtuple('Segment', 'head words tail')):
  def __add__(left, right):
    if isinstance(right, Chunk):
      return Segment(left.head, left.words, left.tail + right)
    elif isinstance(right, Segment):
      return Segment(left.head, left.words + filter(None, [left.tail + right.head]) + right.words, right.tail)

  def flatten(self):
    return filter(None, [self.head]) + self.words + filter(None, [self.tail])


def psplit(s):
  def _(l, r):
    length = r - l

    if length == 0:
      return Chunk('')
    elif length == 1:
      if s[l] in string.whitespace:
        return Segment('', [], '')
      else:
        return Chunk(s[l])
    else:
      pivot = l + (length / 2)

      with spawn(lambda q: q.put(_(l, pivot))) as q_a:
        with spawn(lambda q: q.put(_(pivot, r))) as q_b:
          a, b = q_a.get(), q_b.get()

          return a + b

  return _(0, len(s)).flatten()


def main():
  assert psplit('this is a xfklsjdflkdjsflkdsjf string') == ['this', 'is', 'a', 'xfklsjdflkdjsflkdsjf', 'string']
  assert psplit('   two  words  ') == ['two', 'words']
  assert psplit(' ') == []
  assert psplit('') == []


if __name__ == '__main__':
  main()
	#
	# At YOW!2010, Guy Steele gave a great talk entitled "How to Think about
	# Parallel Programming---Not!"
	#
	# In it, he presented an implementation of a divide and conquer strategy for
	# splitting a string.
	#
	# His solution was written in Fortress.
	#
	# People nearby me were confused.
	#
	# So, I sat down and wrote another implementation in a more common language:
	#
	# Python.
	#
	# Hopefully, this pack of ugly drives home the point of how our current
	# languages don't pack the idiomatic punch for parallelism.
	#
	# -- Scott Robinson <sr@thoughtworks.com>
	#

	import string

	from Queue import Queue
	from collections import namedtuple
	from contextlib import contextmanager
	from threading import Thread


	@contextmanager
	def spawn(func):
	q = Queue()

	t = Thread(target = func, args=(q, ))
	t.start()

	try:
	yield q
	finally:
	t.join()


	class Chunk(str):
	def __add__(left, right):
	if isinstance(right, Chunk):
	return Chunk(str.__add__(left, right))
	elif isinstance(right, Segment):
	return Segment(str.__add__(left, right.head), right.words, right.tail)

	def flatten(self):
	return filter(None, [str(self)])


	class Segment(namedtuple('Segment', 'head words tail')):
	def __add__(left, right):
	if isinstance(right, Chunk):
	return Segment(left.head, left.words, left.tail + right)
	elif isinstance(right, Segment):
	return Segment(left.head, left.words + filter(None, [left.tail + right.head]) + right.words, right.tail)

	def flatten(self):
	return filter(None, [self.head]) + self.words + filter(None, [self.tail])


	def psplit(s):
	def _(l, r):
	length = r - l

	if length == 0:
	return Chunk('')
	elif length == 1:
	if s[l] in string.whitespace:
	return Segment('', [], '')
	else:
	return Chunk(s[l])
	else:
	pivot = l + (length / 2)

	with spawn(lambda q: q.put(_(l, pivot))) as q_a:
	with spawn(lambda q: q.put(_(pivot, r))) as q_b:
	a, b = q_a.get(), q_b.get()

	return a + b

	return _(0, len(s)).flatten()


	def main():
	assert psplit('this is a xfklsjdflkdjsflkdsjf string') == ['this', 'is', 'a', 'xfklsjdflkdjsflkdsjf', 'string']
	assert psplit(' two words ') == ['two', 'words']
	assert psplit(' ') == []
	assert psplit('') == []


	if __name__ == '__main__':
	main()