Created
January 20, 2017 15:52
-
-
Save rossjones/c525f45bb0cdc935649da7cfcf9f82e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Converts the VOA Summary non-domestic rates data from a not-quite-csv to a huge Json blob. | |
python convert.py > summary.json < data/uk-englandwales-ndr-2017-summaryvaluations-proposed-epoch-0001-baseline-csv.csv | |
""" | |
import io | |
import json | |
import sys | |
def generate_record(row, titles): | |
data = {} | |
pos = 0 | |
for cell in row: | |
data[titles[pos]] = cell | |
pos += 1 | |
return data | |
type01_titles = [ | |
'assessment_reference', 'uarn', 'ba_code', 'firm_name', | |
'number_or_name', 'sub_street_3', 'sub_street_2', 'sub_street_1', | |
'street', 'town', 'postal_district', 'county', 'postcode', | |
'scheme_ref', 'primary_description', 'total_area', 'subtotal', | |
'total_value', 'adopted_rv', 'list_year', 'ba_name', | |
'ba_reference_number', 'vo_ref', 'from_date', 'to_date', | |
'scat_code_only', 'unit_of_measurement','unadjusted_price' | |
] | |
type02_titles = [ 'line', 'floor', 'description', 'area', 'price', 'value' ] | |
type03_titles = [ 'other_oa_description', ' size', 'price', 'value' ] | |
type04_titles = [ 'pm_value' ] | |
type05_titles = [ 'spaces', 'spaces_value', 'area', 'area_value', 'total' ] | |
type06_titles = [ 'description', 'percent' ] | |
type07_titles = ['total_before', 'total_adjustment'] | |
processors = { | |
'01': ('details', type01_titles,), | |
'02': ('line_items', type02_titles,), | |
'03': ('additional', type03_titles), | |
'04': ('plant_machinery', type04_titles), | |
'05': ('carpark', type05_titles), | |
'06': ('adjustments', type06_titles), | |
'07': ('adjustment_totals', type07_titles), | |
} | |
def new_record(): | |
return { | |
'line_items': [], | |
'additional': [], | |
'adjustments': [], | |
'plant_machinery': None, | |
'details': None, | |
'carpark': None, | |
'adjustment_totals': None | |
} | |
def process(row_feeder): | |
current_record = new_record() | |
for row in row_feeder: | |
split_row = row.strip().split('*') | |
id_field = split_row[0] | |
# Get the name of this row and titles to use | |
name, title = processors[id_field] | |
# If this is a 01 and we have data from | |
# processing then we should yield the | |
# record | |
if id_field == '01' and current_record.get('details'): | |
yield current_record | |
current_record = new_record() | |
data = generate_record(split_row[1:], title) | |
if id_field in ['02', '03', '06']: | |
current_record[name].append(data) | |
else: | |
current_record[name] = data | |
if current_record: | |
yield current_record | |
if __name__ == '__main__': | |
print("[", end="") | |
counter = 0 | |
with io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8-sig') as f: | |
for record in process(f): | |
if counter > 0: | |
print(",", end="") | |
blob = json.dumps(record) | |
print(blob, end="") | |
counter += 1 | |
print("]") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment