Skip to content

Instantly share code, notes, and snippets.

@snopoke
Created June 26, 2017 13:11
Show Gist options
  • Save snopoke/8ddddf959beb08266cd6919ac239f660 to your computer and use it in GitHub Desktop.
Save snopoke/8ddddf959beb08266cd6919ac239f660 to your computer and use it in GitHub Desktop.
Min Form Date from Location
from dimagi.utils.chunked import chunked
from corehq.apps.locations.dbaccessors import user_ids_at_locations
from corehq.apps.es.aggregations import *
from corehq.apps.es import FormES
from corehq.apps.locations.models import *
from elasticsearch.exceptions import *
from iso8601 import iso8601
domain = 'domain'
loc_name = 'location' # location to get first form from
leaf_type_name = 'leaf_loc_type' # assumes all forms submitted from locations of this type
type = LocationType.objects.filter(domain=domain, name=leaf_type_name)[0]
root_loc = SQLLocation.objects.filter(domain=domain, name=loc_name)[0]
loc_ids = list(SQLLocation.objects.get_locations_and_children([root_loc.location_id]).filter(location_type=type).values_list('location_id', flat=True))
user_ids = user_ids_at_locations(loc_ids)
print("Querying for {} users".format(len(user_ids)))
def _get_min_date(user_ids, current_min=None):
min_date = current_min
user_ids_left = set(user_ids)
try:
for chunk in chunked(user_ids, 100):
chunk = list(chunk)
user_ids_left -= set(chunk)
f = FormES()
f = f.domain(domain)
f = f.aggregation(
TermsAggregation('user', 'form.meta.userID')
.aggregation(
TopHitsAggregation('first_form', field='received_on', size=1, include='received_on'))
).user_id(chunk)
if min_date:
f = f.submitted(lt=min_date[1])
res = f.run()
users = f.run().aggregations.user.buckets_list
for user in users:
rc = iso8601.parse_date(user.first_form.hits[0]['received_on']).replace(tzinfo=None)
if not min_date or rc < min_date[1]:
min_date = (user.first_form.doc_ids[0], rc)
print("Current min: {}".format(min_date[1].isoformat()))
except ConnectionTimeout:
return _get_min_date(user_ids_left, current_min=min_date)
return min_date
min_date = _get_min_date(user_ids)
print('"{}","{}"."{}"'.format(loc_name, min_date[0], min_date[1].isoformat()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment