Skip to content

Instantly share code, notes, and snippets.

@peio
Created July 9, 2012 19:20
Show Gist options
  • Save peio/3078334 to your computer and use it in GitHub Desktop.
Save peio/3078334 to your computer and use it in GitHub Desktop.
Covert XML BG Gov spending to csv
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
from lxml import etree
import ucsv as csv
import codecs
f = codecs.open("Data/10950.xml",'r','utf=8')
xml_contents = f.read()
f.close()
'Parse declaration and return the self stated position'
xml_search = etree.fromstring(xml_contents)
# VEry uGlY!!!
Date = xml_contents[44:54].split('.')
Date = Date[2]+'-'+Date[1]+'-'+Date[0]
#DataRowAll = ['Date', 'Organization', 'OrganizationCode', "PaymentsTotalCount", "PaymentsTotalAmmount", 'PaymentsID', "PaymentsCode", "PaymentsPurpose", "PaymentsCount", "PaymentsAmmount"]
cvsf = csv.writer(open('Data/GovSpending-'+Date+'.csv', 'wb'), quoting=csv.QUOTE_MINIMAL)
# CSV contents
cvsf.writerow(['Date', 'Organization', 'OrganizationCode', 'PaymentsID', "PaymentsCode", "PaymentsPurpose", "PaymentsCount", "PaymentsAmmount"])
for SpendingEntity in xml_search.xpath('SpendingBreakdown')[0]:
Organization = SpendingEntity.attrib["name"]
OrganizationCode = SpendingEntity.attrib["code"].replace("*",'')
for Payment in SpendingEntity:
PaymentsPurpose = Payment.attrib["name"]
PaymentsCount = Payment.attrib["count"]
PaymentsAmmount = Payment.attrib["amount"]
try: PaymentsCode = Payment.attrib["code"].replace(" xxxx",'')
except: PaymentsCode = '00' # Perhaps something better
PaymentsID = Date+'-'+OrganizationCode+'-'+PaymentsCode
print PaymentsID
cvsf.writerow([Date, Organization, OrganizationCode, PaymentsID, PaymentsCode, PaymentsPurpose, PaymentsCount, PaymentsAmmount])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment