Skip to content

Instantly share code, notes, and snippets.

@drdaeman
Created May 10, 2017 15:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drdaeman/0cbfc5985000711d323597e44940e5f9 to your computer and use it in GitHub Desktop.
Save drdaeman/0cbfc5985000711d323597e44940e5f9 to your computer and use it in GitHub Desktop.
Helpers for django-elasticsearch-dsl
import collections
import itertools
def get_result_models_lazy(results):
"""
Given a ``Response`` instance or any iterable with documents,
provides an iterable (a generator, to be exact) with matching
database model instances.
This method is lazy, and it tries to minimize consumption of
the results iterator. Therefore, it is not optimized for
low number of database lookups and does one on each doc type change.
"""
grouped = itertools.groupby(results, key=lambda hit: hit.meta["doc_type"])
for _doc_type, group in grouped:
pks, queryset = [], None
for hit in group:
if queryset is None:
queryset = hit.get_queryset()
pks.append(hit.meta["id"])
assert queryset is not None, "groups cannot be empty"
yield from queryset.filter(pk__in=pks)
def get_result_models_eager(results, pk_normalizer=None):
"""
Given a ``Response`` instance or any iterable with documents,
provides an iterable (a generator, to be exact) with matching
database model instances.
This function is optimized for the number of database queries, so
it iterates the whole result set first. Iteration is performed only once.
Only instances found in the database are returned.
For compatibility with django-hashid-field, use `pk_normalizer=str`.
This way both document IDs and model PKs will be stringified to
match each other.
"""
if pk_normalizer is None:
pk_normalizer = lambda pk: pk
items, pks, querysets = [], collections.defaultdict(set), {}
grouped = itertools.groupby(results, key=lambda hit: hit.meta["doc_type"])
for doc_type, group in grouped:
for hit in group:
if doc_type not in querysets:
querysets[doc_type] = hit.get_queryset()
pk = hit.meta["id"]
pks[doc_type].add(pk)
# Keep keys in `items`, because `results` may be not re-iterable
items.append((doc_type, pk_normalizer(pk)))
instances = {
(doc_type, pk_normalizer(instance.pk)): instance
for doc_type, qs in querysets.items()
for instance in qs.filter(pk__in=pks[doc_type])
}
for key in items:
if key in instances:
yield instances[key]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment