Skip to content

Instantly share code, notes, and snippets.

@rgaudin
Created March 1, 2013 15:16
Show Gist options
  • Save rgaudin/5065294 to your computer and use it in GitHub Desktop.
Save rgaudin/5065294 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: ai ts=4 sts=4 et sw=4 nu
import uuid
import csv
import codecs
import cStringIO
from microsite.formhub import (submit_xml_forms_formhub_raw,
ErrorUploadingDataToFormhub,
ErrorMultipleUploadingDataToFormhub)
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
submission_target_user = 'formhub_r'
submission_server = 'http://formhub.org'
submission_url = '%s/%s/submission' % (submission_server, submission_target_user)
bulk_submission_url = '%s/%s/bulk-submission' % (submission_server, submission_target_user)
xforms = []
CHUNK = 100
submission_template = u"""<?xml version="1.0" encoding="UTF-8" ?>
<KenyaPrimarySchool2007 id="KenyaPrimarySchool2007">
<formhub>
<uuid>953cc44a4a3b4d94ba450cd18a6b3167</uuid>
</formhub>
<Name.of.School>%(Name.of.School)s</Name.of.School>
<Code>%(Code)s</Code>
<Level.of.Education>%(Level.of.Education)s</Level.of.Education>
<Status.of.School>%(Status.of.School)s</Status.of.School>
<Sponsor.of.School>%(Sponsor.of.School)s</Sponsor.of.School>
<School.Institution.Type_1>%(School.Institution.Type_1)s</School.Institution.Type_1>
<School.Institution.Type_2>%(School.Institution.Type_2)s</School.Institution.Type_2>
<School.Institution.Type_3>%(School.Institution.Type_3)s</School.Institution.Type_3>
<Pupil.Teacher.Ratio>%(Pupil.Teacher.Ratio)s</Pupil.Teacher.Ratio>
<Pupil.Classroom.Ratio>%(Pupil.Classroom.Ratio)s</Pupil.Classroom.Ratio>
<Pupil.Toilet.Ratio>%(Pupil.Toilet.Ratio)s</Pupil.Toilet.Ratio>
<Total.Number.of.Classrooms>%(Total.Number.of.Classrooms)s</Total.Number.of.Classrooms>
<Boys.Toilets>%(Boys.Toilets)s</Boys.Toilets>
<Girls.Toilets>%(Girls.Toilets)s</Girls.Toilets>
<Teachers.Toilets>%(Teachers.Toilets)s</Teachers.Toilets>
<Total.Toilets>%(Total.Toilets)s</Total.Toilets>
<Total.Boys>%(Total.Boys)s</Total.Boys>
<Total.Girls>%(Total.Girls)s</Total.Girls>
<Total.Enrolment>%(Total.Enrolment)s</Total.Enrolment>
<GOK.TSC.Male>%(GOK.TSC.Male)s</GOK.TSC.Male>
<GOK.TSC.Female>%(GOK.TSC.Female)s</GOK.TSC.Female>
<Local.Authority.Male>%(Local.Authority.Male)s</Local.Authority.Male>
<Local.Authority.Female>%(Local.Authority.Female)s</Local.Authority.Female>
<PTA.BOG.Male>%(PTA.BOG.Male)s</PTA.BOG.Male>
<PTA.BOG.Female>%(PTA.BOG.Female)s</PTA.BOG.Female>
<Others.Male>%(Others.Male)s</Others.Male>
<Others.Female>%(Others.Female)s</Others.Female>
<Non.Teaching.Staff.Male>%(Non.Teaching.Staff.Male)s</Non.Teaching.Staff.Male>
<Non.Teaching.Staff.Female>%(Non.Teaching.Staff.Female)s</Non.Teaching.Staff.Female>
<Province>%(Province)s</Province>
<District>%(District)s</District>
<Division>%(Division)s</Division>
<Location>%(Location)s</Location>
<Costituency>%(Costituency)s</Costituency>
<Geolocation>%(Geolocation)s</Geolocation>
<meta>
<instanceID>uuid:%(uuid)s</instanceID>
</meta>
</KenyaPrimarySchool2007>"""
def cleanup(text):
# FH as problem with "&" inside nodeValue
return u'<![CDATA[%s]]>' % text
def submit(xforms):
print("submitting %d forms as bulk" % len(xforms))
attachments = []
try:
submit_xml_forms_formhub_raw(submission_url=submission_url,
xforms=xforms,
as_bulk=True,
attachments=attachments,
bulk_submission_url=bulk_submission_url,
timeout=120)
except (ErrorUploadingDataToFormhub, ErrorMultipleUploadingDataToFormhub) as e:
print(e)
print(e.details())
# submission is just an xml string.
csv_input = open('KenyaPrimarySchools_LightlyCleaned.csv')
headers = []
first = True
count = 0
for line in UnicodeReader(csv_input):
if first:
headers = line
first = False
continue
data = dict(zip(headers, [cleanup(cell) for cell in line]))
data.update({'uuid': str(uuid.uuid4())})
submission_xml = submission_template % data
xforms.append(submission_xml)
count += 1
if count % CHUNK == 0:
submit(xforms)
xforms = []
print("Uploaded %d forms" % count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment