Skip to content

Instantly share code, notes, and snippets.

@mcrumm
Created May 28, 2016 00:30
Show Gist options
  • Save mcrumm/3d80f558d93658f1d0533caa3c6987aa to your computer and use it in GitHub Desktop.
Save mcrumm/3d80f558d93658f1d0533caa3c6987aa to your computer and use it in GitHub Desktop.
python collection pipeline example
def mapHits(doc):
source = doc['_source']
if len(source['occurrences']) < 1:
return None
occ = source['occurrences'][0]
return {
'name': source['name'],
'place': occ['place']
}
def reduction(acc, i):
acc['name'].append(i['name'])
acc['place'].append(i['place'])
return acc
hits = [
{
'_source': {
'name': 'Foo Fighters',
'occurrences': [
{
'place': 'The Venue'
}
]
}
},
{
'_source': {
'name': '311',
'occurrences': [
{
'place': 'The Ballroom'
}
]
}
},
{
'_source': {
'name': 'Nickelback',
'occurrences': []
}
}
]
data = {
'name': [],
'description': [],
'place': []
}
mapped = map(mapHits, hits)
filtered = filter(None, mapped)
reduce(reduction, filtered, data)
print data
#ds = hits \
# .map(mapHits) \
# .filter() \
# .reduce(reduction, data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment