Skip to content

Instantly share code, notes, and snippets.

@ttomasz
Last active March 20, 2023 17:26
Show Gist options
  • Save ttomasz/661d5ec910744cb778c222a949413b6d to your computer and use it in GitHub Desktop.
Save ttomasz/661d5ec910744cb778c222a949413b6d to your computer and use it in GitHub Desktop.
Convert multiline (regular) GeoJSON to line-delimited
import sys
import datetime
import time
from pathlib import Path
try:
import ijson
except ModuleNotFoundError:
print("Module isjon not found. Run: `pip install ijson`")
raise
try:
import jsonlines
except ModuleNotFoundError:
print("Module jsonlines not found. Run: `pip install jsonlines`")
raise
def curent_timestamp() -> str:
return datetime.datetime.now().isoformat(sep=" ")
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Number of parameters unexpected. You should run the script like: "
"python convert.py input/path.geojson output/path.geonsonl")
exit(1)
input_path_object = Path(sys.argv[1]).absolute().resolve()
input_path = input_path_object.as_posix()
if not input_path_object.exists():
raise FileNotFoundError(f"{sys.argv[1]} (resolved to: {input_path}) - not found.")
output_path = Path(sys.argv[2]).absolute().as_posix()
print(f"{input_path=}") # = suffix shows variable name
print(f"{output_path=}")
num_rows = 0
num_rows_missing_geometry = 0
print(f"{ijson.backend=}")
with open(input_path) as f, jsonlines.open(output_path, mode="w") as writer:
print(f"{curent_timestamp()} - starting processing...")
time_start = time.perf_counter()
for idx, feature in enumerate(ijson.items(f, "features.item", use_float=True)):
if feature.get("geometry"):
writer.write(feature)
else:
num_rows_missing_geometry += 1
num_rows = idx + 1
if num_rows % 100000 == 0:
print(f"{curent_timestamp()} - rows processed so far: {num_rows:_.0f}")
time_end = time.perf_counter()
duration = datetime.timedelta(seconds=time_end-time_start)
print(f"{curent_timestamp()} - processing finished. Total rows written to new file: {num_rows:_.0f}. "
f"Time: {duration}. Number of dropped rows due to lack of geometry: {num_rows_missing_geometry:_.0f}.")
Display the source blob
Display the rendered blob
Raw
{
"type" : "FeatureCollection",
"features" : [ {
"type" : "Feature",
"properties" : {
"TRIP_ID" : 1373017604620000351,
"CALL_TYPE" : "C",
"ORIGIN_CALL" : "",
"ORIGIN_STAND" : "",
"TAXI_ID" : 20000351,
"TIMESTAMP" : "java.util.GregorianCalendar[time=1373017604,areFieldsSet=true,areAllFieldsSet=true,lenient=true,zone=java.util.SimpleTimeZone[id=UTC,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=1970,MONTH=0,WEEK_OF_YEAR=3,WEEK_OF_MONTH=3,DAY_OF_MONTH=16,DAY_OF_YEAR=16,DAY_OF_WEEK=6,DAY_OF_WEEK_IN_MONTH=3,AM_PM=1,HOUR=9,HOUR_OF_DAY=21,MINUTE=23,SECOND=37,MILLISECOND=604,ZONE_OFFSET=0,DST_OFFSET=0]",
"DAY_TYPE" : "A",
"MISSING_DATA" : false
},
"geometry" : {
"type" : "MultiPoint",
"coordinates" : [ [ -8.669088, 41.235228 ], [ -8.669142, 41.236434 ], [ -8.668863, 41.238369 ], [ -8.667819, 41.236128 ], [ -8.666748, 41.232897 ], [ -8.668368, 41.228892 ], [ -8.671149, 41.226687 ], [ -8.675262, 41.228415 ], [ -8.679879, 41.230557 ], [ -8.685954, 41.232042 ], [ -8.692443, 41.232438 ], [ -8.695629, 41.234238 ], [ -8.695836, 41.238441 ], [ -8.694333, 41.243013 ], [ -8.695107, 41.247549 ], [ -8.69733, 41.250672 ], [ -8.700489, 41.254596 ], [ -8.701713, 41.259483 ], [ -8.701533, 41.264667 ], [ -8.7021, 41.270004 ], [ -8.700444, 41.275503 ], [ -8.698941, 41.280867 ], [ -8.701434, 41.285718 ], [ -8.704773, 41.290461 ], [ -8.706357, 41.295483 ], [ -8.702523, 41.299884 ], [ -8.697591, 41.303547 ], [ -8.697393, 41.308434 ], [ -8.700894, 41.313186 ], [ -8.704386, 41.31792 ], [ -8.7075, 41.322843 ], [ -8.708661, 41.328252 ], [ -8.711145, 41.333598 ], [ -8.712432, 41.339115 ], [ -8.712666, 41.34357 ], [ -8.712027, 41.346648 ], [ -8.712117, 41.350698 ], [ -8.713062, 41.356035 ], [ -8.717904, 41.360796 ], [ -8.722737, 41.365404 ], [ -8.724357, 41.370957 ], [ -8.726283, 41.37552 ], [ -8.728281, 41.379147 ], [ -8.731143, 41.382252 ], [ -8.734932, 41.385114 ], [ -8.738847, 41.388111 ], [ -8.742213, 41.391369 ], [ -8.745471, 41.394717 ], [ -8.748999, 41.398785 ], [ -8.753706, 41.403366 ], [ -8.759556, 41.407353 ], [ -8.76033, 41.412627 ], [ -8.756415, 41.417658 ], [ -8.751537, 41.422401 ], [ -8.746488, 41.427072 ], [ -8.743644, 41.432373 ], [ -8.739972, 41.437773 ], [ -8.736813, 41.443344 ], [ -8.733213, 41.448672 ], [ -8.732241, 41.454324 ], [ -8.735634, 41.459778 ], [ -8.738577, 41.465142 ], [ -8.738874, 41.470263 ], [ -8.740845, 41.475843 ], [ -8.743635, 41.481432 ], [ -8.744616, 41.487075 ], [ -8.746083, 41.492439 ], [ -8.749395, 41.49738 ], [ -8.752446, 41.502537 ], [ -8.753364, 41.508477 ], [ -8.754066, 41.51412 ], [ -8.752401, 41.519637 ], [ -8.750619, 41.525217 ], [ -8.751852, 41.530581 ], [ -8.753373, 41.534919 ], [ -8.754858, 41.539158 ], [ -8.757, 41.543856 ], [ -8.757747, 41.548914 ], [ -8.757459, 41.553432 ], [ -8.757135, 41.557446 ], [ -8.759295, 41.561028 ], [ -8.763336, 41.56452 ], [ -8.76681, 41.568867 ], [ -8.765334, 41.573727 ], [ -8.76321, 41.578416 ], [ -8.764074, 41.583366 ], [ -8.766657, 41.588208 ], [ -8.769645, 41.593113 ], [ -8.772669, 41.597856 ], [ -8.775693, 41.60277 ], [ -8.775288, 41.608269 ], [ -8.772255, 41.612067 ], [ -8.768619, 41.616387 ], [ -8.763831, 41.620086 ], [ -8.761446, 41.624586 ], [ -8.760528, 41.629491 ], [ -8.761491, 41.634126 ], [ -8.763714, 41.63877 ], [ -8.766819, 41.64309 ], [ -8.769609, 41.647545 ], [ -8.771346, 41.652279 ], [ -8.773632, 41.656896 ], [ -8.776854, 41.660928 ], [ -8.777979, 41.665275 ], [ -8.774937, 41.669307 ], [ -8.771211, 41.673006 ], [ -8.771562, 41.677461 ], [ -8.774352, 41.682051 ], [ -8.777448, 41.686227 ], [ -8.781552, 41.689764 ], [ -8.785359, 41.693004 ], [ -8.789166, 41.696244 ], [ -8.792946, 41.699475 ], [ -8.796222, 41.702301 ], [ -8.797311, 41.704848 ], [ -8.798706, 41.706027 ], [ -8.801073, 41.704776 ], [ -8.801379, 41.706711 ], [ -8.802909, 41.706882 ], [ -8.803449, 41.707449 ] ]
}
}
,
{
"type" : "Feature",
"properties" : {
"TRIP_ID" : 1375450445620000195,
"CALL_TYPE" : "C",
"ORIGIN_CALL" : "",
"ORIGIN_STAND" : "",
"TAXI_ID" : 20000195,
"TIMESTAMP" : "java.util.GregorianCalendar[time=1375450445,areFieldsSet=true,areAllFieldsSet=true,lenient=true,zone=java.util.SimpleTimeZone[id=UTC,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=1970,MONTH=0,WEEK_OF_YEAR=3,WEEK_OF_MONTH=3,DAY_OF_MONTH=16,DAY_OF_YEAR=16,DAY_OF_WEEK=6,DAY_OF_WEEK_IN_MONTH=3,AM_PM=1,HOUR=10,HOUR_OF_DAY=22,MINUTE=4,SECOND=10,MILLISECOND=445,ZONE_OFFSET=0,DST_OFFSET=0]",
"DAY_TYPE" : "A",
"MISSING_DATA" : false
},
"geometry" : {
"type" : "MultiPoint",
"coordinates" : [ [ -8.628732, 41.169834 ], [ -8.628732, 41.169843 ], [ -8.628732, 41.169834 ], [ -8.628381, 41.169771 ], [ -8.626923, 41.169924 ], [ -8.62569, 41.169411 ], [ -8.624565, 41.169312 ], [ -8.623782, 41.169213 ], [ -8.623791, 41.169069 ], [ -8.623125, 41.169141 ], [ -8.621946, 41.169663 ], [ -8.619561, 41.171166 ], [ -8.616519, 41.171778 ], [ -8.614818, 41.171823 ], [ -8.613504, 41.17185 ], [ -8.613207, 41.171895 ], [ -8.612856, 41.17158 ], [ -8.612874, 41.171238 ], [ -8.611524, 41.17104 ], [ -8.608725, 41.169492 ], [ -8.607375, 41.169258 ], [ -8.606799, 41.168475 ], [ -8.606628, 41.168025 ], [ -8.606628, 41.166576 ], [ -8.607582, 41.166351 ], [ -8.607573, 41.165712 ] ]
}
}]}
{"type": "Feature", "properties": {"TRIP_ID": 1373017604620000351, "CALL_TYPE": "C", "ORIGIN_CALL": "", "ORIGIN_STAND": "", "TAXI_ID": 20000351, "TIMESTAMP": "java.util.GregorianCalendar[time=1373017604,areFieldsSet=true,areAllFieldsSet=true,lenient=true,zone=java.util.SimpleTimeZone[id=UTC,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=1970,MONTH=0,WEEK_OF_YEAR=3,WEEK_OF_MONTH=3,DAY_OF_MONTH=16,DAY_OF_YEAR=16,DAY_OF_WEEK=6,DAY_OF_WEEK_IN_MONTH=3,AM_PM=1,HOUR=9,HOUR_OF_DAY=21,MINUTE=23,SECOND=37,MILLISECOND=604,ZONE_OFFSET=0,DST_OFFSET=0]", "DAY_TYPE": "A", "MISSING_DATA": false}, "geometry": {"type": "MultiPoint", "coordinates": [[-8.669088, 41.235228], [-8.669142, 41.236434], [-8.668863, 41.238369], [-8.667819, 41.236128], [-8.666748, 41.232897], [-8.668368, 41.228892], [-8.671149, 41.226687], [-8.675262, 41.228415], [-8.679879, 41.230557], [-8.685954, 41.232042], [-8.692443, 41.232438], [-8.695629, 41.234238], [-8.695836, 41.238441], [-8.694333, 41.243013], [-8.695107, 41.247549], [-8.69733, 41.250672], [-8.700489, 41.254596], [-8.701713, 41.259483], [-8.701533, 41.264667], [-8.7021, 41.270004], [-8.700444, 41.275503], [-8.698941, 41.280867], [-8.701434, 41.285718], [-8.704773, 41.290461], [-8.706357, 41.295483], [-8.702523, 41.299884], [-8.697591, 41.303547], [-8.697393, 41.308434], [-8.700894, 41.313186], [-8.704386, 41.31792], [-8.7075, 41.322843], [-8.708661, 41.328252], [-8.711145, 41.333598], [-8.712432, 41.339115], [-8.712666, 41.34357], [-8.712027, 41.346648], [-8.712117, 41.350698], [-8.713062, 41.356035], [-8.717904, 41.360796], [-8.722737, 41.365404], [-8.724357, 41.370957], [-8.726283, 41.37552], [-8.728281, 41.379147], [-8.731143, 41.382252], [-8.734932, 41.385114], [-8.738847, 41.388111], [-8.742213, 41.391369], [-8.745471, 41.394717], [-8.748999, 41.398785], [-8.753706, 41.403366], [-8.759556, 41.407353], [-8.76033, 41.412627], [-8.756415, 41.417658], [-8.751537, 41.422401], [-8.746488, 41.427072], [-8.743644, 41.432373], [-8.739972, 41.437773], [-8.736813, 41.443344], [-8.733213, 41.448672], [-8.732241, 41.454324], [-8.735634, 41.459778], [-8.738577, 41.465142], [-8.738874, 41.470263], [-8.740845, 41.475843], [-8.743635, 41.481432], [-8.744616, 41.487075], [-8.746083, 41.492439], [-8.749395, 41.49738], [-8.752446, 41.502537], [-8.753364, 41.508477], [-8.754066, 41.51412], [-8.752401, 41.519637], [-8.750619, 41.525217], [-8.751852, 41.530581], [-8.753373, 41.534919], [-8.754858, 41.539158], [-8.757, 41.543856], [-8.757747, 41.548914], [-8.757459, 41.553432], [-8.757135, 41.557446], [-8.759295, 41.561028], [-8.763336, 41.56452], [-8.76681, 41.568867], [-8.765334, 41.573727], [-8.76321, 41.578416], [-8.764074, 41.583366], [-8.766657, 41.588208], [-8.769645, 41.593113], [-8.772669, 41.597856], [-8.775693, 41.60277], [-8.775288, 41.608269], [-8.772255, 41.612067], [-8.768619, 41.616387], [-8.763831, 41.620086], [-8.761446, 41.624586], [-8.760528, 41.629491], [-8.761491, 41.634126], [-8.763714, 41.63877], [-8.766819, 41.64309], [-8.769609, 41.647545], [-8.771346, 41.652279], [-8.773632, 41.656896], [-8.776854, 41.660928], [-8.777979, 41.665275], [-8.774937, 41.669307], [-8.771211, 41.673006], [-8.771562, 41.677461], [-8.774352, 41.682051], [-8.777448, 41.686227], [-8.781552, 41.689764], [-8.785359, 41.693004], [-8.789166, 41.696244], [-8.792946, 41.699475], [-8.796222, 41.702301], [-8.797311, 41.704848], [-8.798706, 41.706027], [-8.801073, 41.704776], [-8.801379, 41.706711], [-8.802909, 41.706882], [-8.803449, 41.707449]]}}
{"type": "Feature", "properties": {"TRIP_ID": 1375450445620000195, "CALL_TYPE": "C", "ORIGIN_CALL": "", "ORIGIN_STAND": "", "TAXI_ID": 20000195, "TIMESTAMP": "java.util.GregorianCalendar[time=1375450445,areFieldsSet=true,areAllFieldsSet=true,lenient=true,zone=java.util.SimpleTimeZone[id=UTC,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=1970,MONTH=0,WEEK_OF_YEAR=3,WEEK_OF_MONTH=3,DAY_OF_MONTH=16,DAY_OF_YEAR=16,DAY_OF_WEEK=6,DAY_OF_WEEK_IN_MONTH=3,AM_PM=1,HOUR=10,HOUR_OF_DAY=22,MINUTE=4,SECOND=10,MILLISECOND=445,ZONE_OFFSET=0,DST_OFFSET=0]", "DAY_TYPE": "A", "MISSING_DATA": false}, "geometry": {"type": "MultiPoint", "coordinates": [[-8.628732, 41.169834], [-8.628732, 41.169843], [-8.628732, 41.169834], [-8.628381, 41.169771], [-8.626923, 41.169924], [-8.62569, 41.169411], [-8.624565, 41.169312], [-8.623782, 41.169213], [-8.623791, 41.169069], [-8.623125, 41.169141], [-8.621946, 41.169663], [-8.619561, 41.171166], [-8.616519, 41.171778], [-8.614818, 41.171823], [-8.613504, 41.17185], [-8.613207, 41.171895], [-8.612856, 41.17158], [-8.612874, 41.171238], [-8.611524, 41.17104], [-8.608725, 41.169492], [-8.607375, 41.169258], [-8.606799, 41.168475], [-8.606628, 41.168025], [-8.606628, 41.166576], [-8.607582, 41.166351], [-8.607573, 41.165712]]}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment