Skip to content

Instantly share code, notes, and snippets.

@snopoke
Created December 13, 2019 11:32
Show Gist options
  • Save snopoke/6e862fc16320ade2eb6548d1ba519a69 to your computer and use it in GitHub Desktop.
Save snopoke/6e862fc16320ade2eb6548d1ba519a69 to your computer and use it in GitHub Desktop.
import csv
import shutil
import gzip
from collections import Counter
from elasticsearch.exceptions import ConnectionTimeout
from corehq.util.timezones.utils import parse_date
path = '/home/cchq/form_dates'
try:
shutil.rmtree(path)
except FileNotFoundError:
pass
os.makedirs(path)
gstart = datetime.utcnow()
start = datetime(2019, 10, 1)
tc = 0
day_summary = Counter()
while start < end:
all_data_filename = f"{start.strftime('%Y-%m-%d')}.csv.gz"
summary_filename = f"{start.strftime('%Y-%m-%d')}_summary.csv"
all_data_headers = []
if not os.path.isfile(os.path.join(path, all_data_filename)):
all_data_headers = ['form_id', 'completed_on', 'received_on', 'days_diff']
if day_summary:
print(f' Writing summary {summary_filename}')
with open(os.path.join(path, summary_filename), 'w') as f:
writer = csv.writer(f)
writer.writerow(['days_diff', 'form_count'])
writer.writerows(sorted(day_summary.items()))
day_summary = Counter()
count = 0
se = start + timedelta(hours=1)
rows = []
try:
forms = FormES().domain('icds-cas').completed(gte=start, lt=se).source(['form.meta.timeEnd', 'received_on']).scroll()
for form in forms:
count += 1
completed_on = form['form']['meta']['timeEnd']
received_on = form['received_on']
days_diff = parse_date(received_on).date() - parse_date(completed_on).date()
rows.append([form['_id'], completed_on, received_on, days_diff.days])
except ConnectionTimeout:
print(f' retrying batch: {start} to {se}')
continue
with gzip.open(os.path.join(path, all_data_filename), 'at') as f:
writer = csv.writer(f)
if all_data_headers:
writer.writerow(all_data_headers)
writer.writerows(rows)
day_summary.update([row[3] for row in rows])
tc += count
print(f'[{datetime.utcnow()}] Runtime: {datetime.utcnow() - gstart}, Progress: {start} to {se}: {count} ({tc})')
start = se
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment