Skip to content

Instantly share code, notes, and snippets.

@jimklo
Created January 14, 2012 02:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jimklo/1610029 to your computer and use it in GitHub Desktop.
Save jimklo/1610029 to your computer and use it in GitHub Desktop.
PyF Help
from pyf.componentized.components import Producer
from pyf.componentized import ET
import logging
from pyf.componentized.configuration.keys import SimpleKey, RepeatedKey,\
CompoundKey
from pyf.componentized.configuration.fields import InputField
log = logging.getLogger(__name__)
class LRHarvester(Producer):
"""docstring for LRHarvester"""
name = "LR Node Harvester"
configuration = [
SimpleKey('node_url', label="Node URL", help_text="http://node01.public.learningregistry.net")
]
def __init__(self, config_node, process_name):
super(LRHarvester, self).__init__(config_node, process_name)
def __harvest(self, url=None):
if url == None:
url = self.get_config_key('node_url')
resp = urllib2.urlopen(url)
data = json.load(resp)
for idx, i in enumerate(data['listrecords']):
envelope = i['record']['resource_data']
log.info("First: {0}".format(idx))
yield envelope
if data.has_key("resumption_token") and \
data['resumption_token'] is not None and \
data['resumption_token'] != "null":
urlParts = urlparse.urlparse(lrUrl)
newQuery = urllib.urlencode({"resumption_token":data['resumption_token']})
lrUrl = urlparse.urlunparse((urlParts[0], urlParts[1], urlParts[2], urlParts[3], newQuery, urlParts[5]))
for idx, i in enumerate(self.__harvest(lrUrl)):
log.info("Second: {0}".format(idx))
yield i
def launch(self, progression_callback=None, message_callback=None, params=None):
if not progression_callback:
progression_callback = lambda x: log.debug('Progression : %s' % x)
if not message_callback:
message_callback = log.info
return self.__harvest()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment