Skip to content

Instantly share code, notes, and snippets.

@rgaudin
Created August 30, 2012 16:47
Show Gist options
  • Save rgaudin/3532885 to your computer and use it in GitHub Desktop.
Save rgaudin/3532885 to your computer and use it in GitHub Desktop.
Script to import data from CSV into a formhub Form
#!/usr/bin/env python
# encoding=utf-8
'''
formhub data importer
Automates the submission of formhub XForms from a CSV file,
possibly an export from another formhub form.
Launch with `bulk` argument to trigger bulk submission behavior (only
one network request)
Tweak variables & code to adapt
XForm is generated from a template which is a complete XML file
containing placeholders: `#varname` will be replaced by content of
column `varname` for each row.
Example:
<?xml version='1.0' ?>
<data id="#form_id">
<start>#start</start>
<end>#end</end>
<today>#today</today>
<deviceid/>
<subscriberid/>
<imei/>
<phonenumber/>
</data> '''
import zipfile
import random
import datetime
import requests
import sys
DATASOURCE = 'learning_assessments_haiti_may9_2012_08_30.csv'
FORM_ID = 'LearningAssessment_SADA_Aug16'
FORMHUB_SUBMISSION_URL = 'http://formhub.org/litreportcards/submission'
FORMHUB_BULK_SUBMISSION_URL = 'http://formhub.org/litreportcards/bulk-submission'
TEACHERS_IDS = [
u'http://litreportcards.modilabs.org/teachers/f81cf83a0d784590a621b925d4a7e75d',
u'http://litreportcards.modilabs.org/teachers/914655942777429e84bbc5f97eaf69c6',
u'http://litreportcards.modilabs.org/teachers/ec5e6c9f9b284bc9bf578a5afc7ea06e',
u'http://litreportcards.modilabs.org/teachers/99231f8e9b214587a22e12613a5a02fc',
]
FORM_TEMPLATE = 'form_template.xml'
DATE_FMT = u'%Y-%m-%dT%H:%M:%S.%f'
BULK_SUBMISSION_FILE = 'bulk-submission.zip'
def get_random_date():
return datetime.datetime(random.randint(2010, 2012),
random.randint(1, 12),
random.randint(1, 28))
def get_random_teacher():
return TEACHERS_IDS[random.randint(0, len(TEACHERS_IDS) - 1)]
def generate_form(data, form_id):
start = get_random_date()
end = start + datetime.timedelta(minutes=20)
data.update({
'start': start.strftime(DATE_FMT),
'end': end.strftime(DATE_FMT),
'today': start.strftime(DATE_FMT),
'form_id': form_id,
'teacher_barcode': get_random_teacher(),
'barcode_fail_reason': '',
'school_primary': '',
'school_junior_secondary': '',
'school_senior_secondary': '',
'literacy_note': '',
})
xml_form = FORM_TEMPLATE
for field, value in data.items():
xml_form = xml_form.replace(u'#%s' % field, value)
return xml_form
def main(datasource_name, form_id, bulk=False):
headers = []
row_count = 0
with open(datasource_name) as datasource:
for row in datasource.read().splitlines():
# first row contain headers (column identifiers)
if not headers:
for header in row.split(','):
header = header.strip()
headers.append(header)
continue
row_count += 1
# tag each value with its header
data = {}
for index, field in enumerate(row.split(',')):
data[headers[index]] = field
# generate XML form from template
xml_form = generate_form(data, form_id)
print('Generated form %d' % row_count)
# bulk writes the form to a file and continue
if bulk:
with open('form%d.xml' % row_count, 'w') as xml_file:
xml_file.write(xml_form)
continue
# non-bulk sends the form to formhub directly.
print(u"Submitting form")
try:
req = requests.post(FORMHUB_SUBMISSION_URL,
files={'xml_submission_file':
('form%d.xml' % row_count, xml_form)})
except requests.exceptions.Timeout:
print(u"Unable to send: timeout")
continue
except Exception as e:
print(u"Unable to send: %r" % e.message)
continue
if not req.status_code in (200, 201, 202):
print(u'Unable to submit form: %s' % req.text)
else:
print(u'Submitted OK')
# bulk builds a ZIP of all previously created XML files then sends
if bulk:
with zipfile.ZipFile(BULK_SUBMISSION_FILE, 'w') as zfile:
for num in range(1, row_count + 1):
zfile.write('form%d.xml' % num)
print(u"Uploading bulk ZIP file")
try:
req = requests.post(FORMHUB_BULK_SUBMISSION_URL,
files={'zip_submission_file':
(BULK_SUBMISSION_FILE,
open(BULK_SUBMISSION_FILE))},
timeout=60)
except requests.exceptions.Timeout:
print(u"Unable to send: timeout")
return
except Exception as e:
print(u"Unable to send: %r" % e.message)
return
if not req.status_code in (200, 201, 202):
print(u'Unable to submit ZIP: %s' % req.text)
else:
print(u'ZIP Submitted OK')
if __name__ == '__main__':
with open(FORM_TEMPLATE) as form:
FORM_TEMPLATE = form.read()
args = [arg.lower().strip() for arg in sys.argv]
bulk = 'bulk' in args
main(DATASOURCE, form_id=FORM_ID, bulk=bulk)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment