Skip to content

Instantly share code, notes, and snippets.

@bertday
Last active August 29, 2015 14:20
Show Gist options
  • Save bertday/f8dd28de125007c9efab to your computer and use it in GitHub Desktop.
Save bertday/f8dd28de125007c9efab to your computer and use it in GitHub Desktop.
Convert Parking Ticket Dump to CSV
import csv
'''
CONFIG
'''
IN_FILE_NAME = 'PH_CITY_EXTRACT_2014'
FIELDS = [
{'name': 'ticket', 'length': 11},
{'name': 'issue_date', 'length': 10},
{'name': 'issue_time', 'length': 5},
{'name': 'state', 'length': 2},
{'name': 'plate', 'length': 8},
{'name': 'division', 'length': 4},
{'name': 'location', 'length': 35},
{'name': 'violation', 'length': 23},
{'name': 'fine', 'length': 6},
{'name': 'issuing_agency', 'length': 6},
]
'''
MAIN
'''
# Compute start/stop indexes
for i, field in enumerate(FIELDS):
if i == 0:
field['start'] = 0
field['stop'] = field['length']
else:
last_field = FIELDS[i - 1]
field['start'] = last_field['stop']
field['stop'] = field['start'] + field['length']
# Set up files
in_file = open(IN_FILE_NAME)
out_file = open('{}.csv'.format(IN_FILE_NAME), 'wb')
out_writer = csv.writer(out_file, quoting=csv.QUOTE_ALL)
# Write headers
out_writer.writerow([field['name'] for field in FIELDS])
# Loop over rows
for i, row in enumerate(in_file.readlines()):
if i % 100000 == 0:
print i
# Remove newline
row = row[:-1]
out_vals = []
for field in FIELDS:
start = field['start']
stop = field['stop']
val = row[start:stop]
val = val.strip()
out_vals.append(val)
out_writer.writerow(out_vals)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment