Skip to content

Instantly share code, notes, and snippets.

@yohanboniface
Created January 16, 2015 20:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yohanboniface/b5005437d383a0e031a6 to your computer and use it in GitHub Desktop.
Save yohanboniface/b5005437d383a0e031a6 to your computer and use it in GitHub Desktop.
import csv
import json
import time
from addok.import_utils import FIELDS
def main(filepath, destination, limit=None):
print('Processing', filepath)
start = time.time()
with open(destination, mode='w') as df:
with open(filepath) as f:
reader = csv.DictReader(f, fieldnames=FIELDS, delimiter='|')
count = 0
previous = None
doc = None
for row in reader:
_id = row['source_id'].split('-')[0]
if not previous or _id != previous:
previous = _id
if doc:
df.write(json.dumps(doc) + '\n')
doc = row.copy()
del doc['source_id']
del doc['source']
doc['id'] = _id
if doc['type'] == 'number':
doc['type'] = 'street'
if row['housenumber']:
if not 'housenumbers' in doc:
doc['housenumbers'] = {}
doc['housenumbers'][row['housenumber']] = {
'lat': row['lat'],
'lon': row['lon']
}
if doc.get('housenumber'):
del doc['housenumber']
count += 1
if count % 10000 == 0:
print("Done", count, time.time() - start)
print('Done in', time.time() - start)
if __name__ == '__main__':
main('full.csv', 'full.sjson')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment