enewe101/pod_example.py

## pod_example.py
import random
import shutil
from t4k import (
	PersistentOrderedDict as POD, ProgressTracker as PT,
	DuplicateKeyException
)


# The POD is a general-purpose data store that syncs to disk
# The PT is a subclass that's a bit more convenient for tracking
# long-running processes.


#
#		Here's a typical example
#

# Let me just define a couple things first to use in the example...
CITIES = [
	'Montreal', 'Toronto', 'Vancouver', 'Ottawa', 'New York', 'London',
	'Tokyo'
]
MAX_TRIES = 3
class CityError(Exception):
	pass
def process_city(city):
	if random.random() < 0.3:
		print 'CityError! Try again later!'
		raise CityError()
	if city in {'Montreal', 'Toronto', 'Ottawa'}:
		msg = 'Edward lived there once.'
	elif city == 'Tokyo':
		msg = 'Tokyo is a cool city!'
	else:
		msg = "%s is a crap city!" % city
	print msg
	return msg


# OK, here's the example.  To simulate dealing with an unreliable resource,
# the call to process_city will randomly throw an error.
# You'll probably need to run the example a few times before it successfully
# processes all the cities.  Try it out!
tracker = PT('city-tracker')
for city in CITIES:

	if not tracker.check_or_add(city):
		if tracker[city]['_tries'] > MAX_TRIES:
			continue
		tracker.increment_tries(city)
	else:
		print '%s is already done, skiping!' % city
		continue

	try:
		result = process_city(city)
	except CityError:
		continue

	tracker.set(city, 'result', result)
	tracker.mark_done(city)


######## 	More details below!


#
# 		Store arbitrary data: (key, subkey, value)
#

# When making a PT or POD, give a path -- a directory will be made there
# that will store the POD files.
pod = PT('example-pod')

# Creates a new entry for something you want to track.
# for example, maybe you want to track which urls have been crawled
# which files have been written etc.
pod.add('some-key')

# Trying to add the same key twice is an error!
try:
	pod.add('some-key')
except DuplicateKeyException:
	print 'Trying to add the same key twice is an error!'

# You can set arbitrary data on the entry.
# By default, this will be synced to disk immediately.
pod.set('some-key', 'sub-key', 42)
pod['some-key']['sub-key'] # this will return 42.


#
#		Keep track of progress
#

# Check if an entry is "done".  It will return false this time
pod.check('some-key')

# Mark an entry done
pod.mark_done('some-key')

# Now this returns True...
pod.check('some-key')

# Checking for a non-existent key is fine.  It returns False
pod.check('non-existent-key')

# A common pattern is to check if a key is done, and also, if it doesn't
# exist, to add it.  This will return False if the entry is not done or
# doesn't exist, and if it doesn't exist it the key will be added.
pod.check_or_add('some-key')

# Keep track of the number of times you try to process an entry
pod.increment_tries('some-key')

# The entry's number of tries is in a subkey '_tries'.
# This would return 1 right now, because we called increment_tries above
pod['some-key']['_tries']

# Reset tries back to zero
pod.reset_tries('some-key')

# Now this returns 0
pod['some-key']['_tries']


#
#		Control syncing to disk
#

# If you're making many changes, then syncing them to disc becomes
# too much of a performance hit.  Call hold, and any changes you make
# will be held in memory, but won't be synced to disk
pod.hold() # After calling pod.hold, any changes are reflected in memory

# Then, calling unhold() later will cause all of those changes to be synced
pod.unhold()

# A common pattern is:
#	1. call hold()
#	2. make several updates
#	3. call unhold()

# You can manually mark some entry as needing to be synced.  Usually
# you don't need this unless your doing some hacky stuff.  Calling this
# will either cause the entry to be synced immediately, or if hold() has
# been called, the entry will be synced as soon as unhold() is called
pod.update('some-key')

# Remove the example-pod so that the example can be run multiple times
# without raising unintended DuplicateKey exceptions
shutil.rmtree('example-pod')
	import random
	import shutil
	from t4k import (
	PersistentOrderedDict as POD, ProgressTracker as PT,
	DuplicateKeyException
	)


	# The POD is a general-purpose data store that syncs to disk
	# The PT is a subclass that's a bit more convenient for tracking
	# long-running processes.


	#
	# Here's a typical example
	#

	# Let me just define a couple things first to use in the example...
	CITIES = [
	'Montreal', 'Toronto', 'Vancouver', 'Ottawa', 'New York', 'London',
	'Tokyo'
	]
	MAX_TRIES = 3
	class CityError(Exception):
	pass
	def process_city(city):
	if random.random() < 0.3:
	print 'CityError! Try again later!'
	raise CityError()
	if city in {'Montreal', 'Toronto', 'Ottawa'}:
	msg = 'Edward lived there once.'
	elif city == 'Tokyo':
	msg = 'Tokyo is a cool city!'
	else:
	msg = "%s is a crap city!" % city
	print msg
	return msg


	# OK, here's the example. To simulate dealing with an unreliable resource,
	# the call to process_city will randomly throw an error.
	# You'll probably need to run the example a few times before it successfully
	# processes all the cities. Try it out!
	tracker = PT('city-tracker')
	for city in CITIES:

	if not tracker.check_or_add(city):
	if tracker[city]['_tries'] > MAX_TRIES:
	continue
	tracker.increment_tries(city)
	else:
	print '%s is already done, skiping!' % city
	continue

	try:
	result = process_city(city)
	except CityError:
	continue

	tracker.set(city, 'result', result)
	tracker.mark_done(city)


	######## More details below!


	#
	# Store arbitrary data: (key, subkey, value)
	#

	# When making a PT or POD, give a path -- a directory will be made there
	# that will store the POD files.
	pod = PT('example-pod')

	# Creates a new entry for something you want to track.
	# for example, maybe you want to track which urls have been crawled
	# which files have been written etc.
	pod.add('some-key')

	# Trying to add the same key twice is an error!
	try:
	pod.add('some-key')
	except DuplicateKeyException:
	print 'Trying to add the same key twice is an error!'

	# You can set arbitrary data on the entry.
	# By default, this will be synced to disk immediately.
	pod.set('some-key', 'sub-key', 42)
	pod['some-key']['sub-key'] # this will return 42.


	#
	# Keep track of progress
	#

	# Check if an entry is "done". It will return false this time
	pod.check('some-key')

	# Mark an entry done
	pod.mark_done('some-key')

	# Now this returns True...
	pod.check('some-key')

	# Checking for a non-existent key is fine. It returns False
	pod.check('non-existent-key')

	# A common pattern is to check if a key is done, and also, if it doesn't
	# exist, to add it. This will return False if the entry is not done or
	# doesn't exist, and if it doesn't exist it the key will be added.
	pod.check_or_add('some-key')

	# Keep track of the number of times you try to process an entry
	pod.increment_tries('some-key')

	# The entry's number of tries is in a subkey '_tries'.
	# This would return 1 right now, because we called increment_tries above
	pod['some-key']['_tries']

	# Reset tries back to zero
	pod.reset_tries('some-key')

	# Now this returns 0
	pod['some-key']['_tries']


	#
	# Control syncing to disk
	#

	# If you're making many changes, then syncing them to disc becomes
	# too much of a performance hit. Call hold, and any changes you make
	# will be held in memory, but won't be synced to disk
	pod.hold() # After calling pod.hold, any changes are reflected in memory

	# Then, calling unhold() later will cause all of those changes to be synced
	pod.unhold()

	# A common pattern is:
	# 1. call hold()
	# 2. make several updates
	# 3. call unhold()

	# You can manually mark some entry as needing to be synced. Usually
	# you don't need this unless your doing some hacky stuff. Calling this
	# will either cause the entry to be synced immediately, or if hold() has
	# been called, the entry will be synced as soon as unhold() is called
	pod.update('some-key')

	# Remove the example-pod so that the example can be run multiple times
	# without raising unintended DuplicateKey exceptions
	shutil.rmtree('example-pod')