Skip to content

Instantly share code, notes, and snippets.

@jacobg
Last active August 29, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jacobg/6a78a8f90e44c3a8993c to your computer and use it in GitHub Desktop.
Save jacobg/6a78a8f90e44c3a8993c to your computer and use it in GitHub Desktop.
Snippets showing batch save mapreduce in djangoappengine
class SQLInsertCompiler(NonrelInsertCompiler, SQLCompiler):
@safe_call
def insert(self, data_list, return_id=False):
opts = self.query.get_meta()
unindexed_fields = get_model_indexes(self.query.model)['unindexed']
unindexed_cols = [opts.get_field(name).column
for name in unindexed_fields]
entity_list = []
for i, data in enumerate(data_list):
properties = {}
kwds = {'unindexed_properties': unindexed_cols}
for column, value in data.items():
# The value will already be a db.Key, but the Entity
# constructor takes a name or id of the key, and will
# automatically create a new key if neither is given.
if column == opts.pk.column:
if value is not None:
kwds['id'] = value.id()
kwds['name'] = value.name()
# GAE does not store empty lists (and even does not allow
# passing empty lists to Entity.update) so skip them.
elif isinstance(value, (tuple, list)) and not len(value):
continue
# Use column names as property names.
else:
properties[column] = value
entity = Entity(opts.db_table, **kwds)
entity.update(properties)
d_entity = self.query.objs[i]
batch_op_class = getattr(d_entity, 'batch_op_class', None)
if d_entity.id and batch_op_class: # batch op only supported for existing entity (i.e., update)
d_entity.batch_op = batch_op_class(entity) # save op in django model object so caller can execute it
keys = [entity.key()]
else:
entity_list.append(entity)
if entity_list:
keys = Put(entity_list)
return keys[0] if isinstance(keys, list) else keys
from mapreduce import context, operation as op
def _batch_save(entity):
entity.batch_op_class = op.db.Put
entity.save()
return entity.batch_op # batch op set in db compiler so it can be executed later
def save_entity_mapper(entity):
# instead of naive single save: entity.save()
# do the following more efficient batch save:
yield _batch_save(entity)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment