Skip to content

Instantly share code, notes, and snippets.

@enewe101
Last active July 27, 2016 16:54
Show Gist options
  • Save enewe101/e1da9b9a5681f90df36354d61dad79f3 to your computer and use it in GitHub Desktop.
Save enewe101/e1da9b9a5681f90df36354d61dad79f3 to your computer and use it in GitHub Desktop.
import random
import shutil
from t4k import (
PersistentOrderedDict as POD, ProgressTracker as PT,
DuplicateKeyException
)
# The POD is a general-purpose data store that syncs to disk
# The PT is a subclass that's a bit more convenient for tracking
# long-running processes.
#
# Here's a typical example
#
# Let me just define a couple things first to use in the example...
CITIES = [
'Montreal', 'Toronto', 'Vancouver', 'Ottawa', 'New York', 'London',
'Tokyo'
]
MAX_TRIES = 3
class CityError(Exception):
pass
def process_city(city):
if random.random() < 0.3:
print 'CityError! Try again later!'
raise CityError()
if city in {'Montreal', 'Toronto', 'Ottawa'}:
msg = 'Edward lived there once.'
elif city == 'Tokyo':
msg = 'Tokyo is a cool city!'
else:
msg = "%s is a crap city!" % city
print msg
return msg
# OK, here's the example. To simulate dealing with an unreliable resource,
# the call to process_city will randomly throw an error.
# You'll probably need to run the example a few times before it successfully
# processes all the cities. Try it out!
tracker = PT('city-tracker')
for city in CITIES:
if not tracker.check_or_add(city):
if tracker[city]['_tries'] > MAX_TRIES:
continue
tracker.increment_tries(city)
else:
print '%s is already done, skiping!' % city
continue
try:
result = process_city(city)
except CityError:
continue
tracker.set(city, 'result', result)
tracker.mark_done(city)
######## More details below!
#
# Store arbitrary data: (key, subkey, value)
#
# When making a PT or POD, give a path -- a directory will be made there
# that will store the POD files.
pod = PT('example-pod')
# Creates a new entry for something you want to track.
# for example, maybe you want to track which urls have been crawled
# which files have been written etc.
pod.add('some-key')
# Trying to add the same key twice is an error!
try:
pod.add('some-key')
except DuplicateKeyException:
print 'Trying to add the same key twice is an error!'
# You can set arbitrary data on the entry.
# By default, this will be synced to disk immediately.
pod.set('some-key', 'sub-key', 42)
pod['some-key']['sub-key'] # this will return 42.
#
# Keep track of progress
#
# Check if an entry is "done". It will return false this time
pod.check('some-key')
# Mark an entry done
pod.mark_done('some-key')
# Now this returns True...
pod.check('some-key')
# Checking for a non-existent key is fine. It returns False
pod.check('non-existent-key')
# A common pattern is to check if a key is done, and also, if it doesn't
# exist, to add it. This will return False if the entry is not done or
# doesn't exist, and if it doesn't exist it the key will be added.
pod.check_or_add('some-key')
# Keep track of the number of times you try to process an entry
pod.increment_tries('some-key')
# The entry's number of tries is in a subkey '_tries'.
# This would return 1 right now, because we called increment_tries above
pod['some-key']['_tries']
# Reset tries back to zero
pod.reset_tries('some-key')
# Now this returns 0
pod['some-key']['_tries']
#
# Control syncing to disk
#
# If you're making many changes, then syncing them to disc becomes
# too much of a performance hit. Call hold, and any changes you make
# will be held in memory, but won't be synced to disk
pod.hold() # After calling pod.hold, any changes are reflected in memory
# Then, calling unhold() later will cause all of those changes to be synced
pod.unhold()
# A common pattern is:
# 1. call hold()
# 2. make several updates
# 3. call unhold()
# You can manually mark some entry as needing to be synced. Usually
# you don't need this unless your doing some hacky stuff. Calling this
# will either cause the entry to be synced immediately, or if hold() has
# been called, the entry will be synced as soon as unhold() is called
pod.update('some-key')
# Remove the example-pod so that the example can be run multiple times
# without raising unintended DuplicateKey exceptions
shutil.rmtree('example-pod')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment