Skip to content

Instantly share code, notes, and snippets.

@ttomasz
Last active March 20, 2023 17:26
Show Gist options
  • Save ttomasz/661d5ec910744cb778c222a949413b6d to your computer and use it in GitHub Desktop.
Save ttomasz/661d5ec910744cb778c222a949413b6d to your computer and use it in GitHub Desktop.
Convert multiline (regular) GeoJSON to line-delimited
import sys
import datetime
import time
from pathlib import Path
try:
import ijson
except ModuleNotFoundError:
print("Module isjon not found. Run: `pip install ijson`")
raise
try:
import jsonlines
except ModuleNotFoundError:
print("Module jsonlines not found. Run: `pip install jsonlines`")
raise
def curent_timestamp() -> str:
return datetime.datetime.now().isoformat(sep=" ")
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Number of parameters unexpected. You should run the script like: "
"python convert.py input/path.geojson output/path.geonsonl")
exit(1)
input_path_object = Path(sys.argv[1]).absolute().resolve()
input_path = input_path_object.as_posix()
if not input_path_object.exists():
raise FileNotFoundError(f"{sys.argv[1]} (resolved to: {input_path}) - not found.")
output_path = Path(sys.argv[2]).absolute().as_posix()
print(f"{input_path=}") # = suffix shows variable name
print(f"{output_path=}")
num_rows = 0
num_rows_missing_geometry = 0
print(f"{ijson.backend=}")
with open(input_path) as f, jsonlines.open(output_path, mode="w") as writer:
print(f"{curent_timestamp()} - starting processing...")
time_start = time.perf_counter()
for idx, feature in enumerate(ijson.items(f, "features.item", use_float=True)):
if feature.get("geometry"):
writer.write(feature)
else:
num_rows_missing_geometry += 1
num_rows = idx + 1
if num_rows % 100000 == 0:
print(f"{curent_timestamp()} - rows processed so far: {num_rows:_.0f}")
time_end = time.perf_counter()
duration = datetime.timedelta(seconds=time_end-time_start)
print(f"{curent_timestamp()} - processing finished. Total rows written to new file: {num_rows:_.0f}. "
f"Time: {duration}. Number of dropped rows due to lack of geometry: {num_rows_missing_geometry:_.0f}.")
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{"type": "Feature", "properties": {"TRIP_ID": 1373017604620000351, "CALL_TYPE": "C", "ORIGIN_CALL": "", "ORIGIN_STAND": "", "TAXI_ID": 20000351, "TIMESTAMP": "java.util.GregorianCalendar[time=1373017604,areFieldsSet=true,areAllFieldsSet=true,lenient=true,zone=java.util.SimpleTimeZone[id=UTC,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=1970,MONTH=0,WEEK_OF_YEAR=3,WEEK_OF_MONTH=3,DAY_OF_MONTH=16,DAY_OF_YEAR=16,DAY_OF_WEEK=6,DAY_OF_WEEK_IN_MONTH=3,AM_PM=1,HOUR=9,HOUR_OF_DAY=21,MINUTE=23,SECOND=37,MILLISECOND=604,ZONE_OFFSET=0,DST_OFFSET=0]", "DAY_TYPE": "A", "MISSING_DATA": false}, "geometry": {"type": "MultiPoint", "coordinates": [[-8.669088, 41.235228], [-8.669142, 41.236434], [-8.668863, 41.238369], [-8.667819, 41.236128], [-8.666748, 41.232897], [-8.668368, 41.228892], [-8.671149, 41.226687], [-8.675262, 41.228415], [-8.679879, 41.230557], [-8.685954, 41.232042], [-8.692443, 41.232438], [-8.695629, 41.234238], [-8.695836, 41.238441], [-8.694333, 41.243013], [-8.695107, 41.247549], [-8.69733, 41.250672], [-8.700489, 41.254596], [-8.701713, 41.259483], [-8.701533, 41.264667], [-8.7021, 41.270004], [-8.700444, 41.275503], [-8.698941, 41.280867], [-8.701434, 41.285718], [-8.704773, 41.290461], [-8.706357, 41.295483], [-8.702523, 41.299884], [-8.697591, 41.303547], [-8.697393, 41.308434], [-8.700894, 41.313186], [-8.704386, 41.31792], [-8.7075, 41.322843], [-8.708661, 41.328252], [-8.711145, 41.333598], [-8.712432, 41.339115], [-8.712666, 41.34357], [-8.712027, 41.346648], [-8.712117, 41.350698], [-8.713062, 41.356035], [-8.717904, 41.360796], [-8.722737, 41.365404], [-8.724357, 41.370957], [-8.726283, 41.37552], [-8.728281, 41.379147], [-8.731143, 41.382252], [-8.734932, 41.385114], [-8.738847, 41.388111], [-8.742213, 41.391369], [-8.745471, 41.394717], [-8.748999, 41.398785], [-8.753706, 41.403366], [-8.759556, 41.407353], [-8.76033, 41.412627], [-8.756415, 41.417658], [-8.751537, 41.422401], [-8.746488, 41.427072], [-8.743644, 41.432373], [-8.739972, 41.437773], [-8.736813, 41.443344], [-8.733213, 41.448672], [-8.732241, 41.454324], [-8.735634, 41.459778], [-8.738577, 41.465142], [-8.738874, 41.470263], [-8.740845, 41.475843], [-8.743635, 41.481432], [-8.744616, 41.487075], [-8.746083, 41.492439], [-8.749395, 41.49738], [-8.752446, 41.502537], [-8.753364, 41.508477], [-8.754066, 41.51412], [-8.752401, 41.519637], [-8.750619, 41.525217], [-8.751852, 41.530581], [-8.753373, 41.534919], [-8.754858, 41.539158], [-8.757, 41.543856], [-8.757747, 41.548914], [-8.757459, 41.553432], [-8.757135, 41.557446], [-8.759295, 41.561028], [-8.763336, 41.56452], [-8.76681, 41.568867], [-8.765334, 41.573727], [-8.76321, 41.578416], [-8.764074, 41.583366], [-8.766657, 41.588208], [-8.769645, 41.593113], [-8.772669, 41.597856], [-8.775693, 41.60277], [-8.775288, 41.608269], [-8.772255, 41.612067], [-8.768619, 41.616387], [-8.763831, 41.620086], [-8.761446, 41.624586], [-8.760528, 41.629491], [-8.761491, 41.634126], [-8.763714, 41.63877], [-8.766819, 41.64309], [-8.769609, 41.647545], [-8.771346, 41.652279], [-8.773632, 41.656896], [-8.776854, 41.660928], [-8.777979, 41.665275], [-8.774937, 41.669307], [-8.771211, 41.673006], [-8.771562, 41.677461], [-8.774352, 41.682051], [-8.777448, 41.686227], [-8.781552, 41.689764], [-8.785359, 41.693004], [-8.789166, 41.696244], [-8.792946, 41.699475], [-8.796222, 41.702301], [-8.797311, 41.704848], [-8.798706, 41.706027], [-8.801073, 41.704776], [-8.801379, 41.706711], [-8.802909, 41.706882], [-8.803449, 41.707449]]}}
{"type": "Feature", "properties": {"TRIP_ID": 1375450445620000195, "CALL_TYPE": "C", "ORIGIN_CALL": "", "ORIGIN_STAND": "", "TAXI_ID": 20000195, "TIMESTAMP": "java.util.GregorianCalendar[time=1375450445,areFieldsSet=true,areAllFieldsSet=true,lenient=true,zone=java.util.SimpleTimeZone[id=UTC,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=1970,MONTH=0,WEEK_OF_YEAR=3,WEEK_OF_MONTH=3,DAY_OF_MONTH=16,DAY_OF_YEAR=16,DAY_OF_WEEK=6,DAY_OF_WEEK_IN_MONTH=3,AM_PM=1,HOUR=10,HOUR_OF_DAY=22,MINUTE=4,SECOND=10,MILLISECOND=445,ZONE_OFFSET=0,DST_OFFSET=0]", "DAY_TYPE": "A", "MISSING_DATA": false}, "geometry": {"type": "MultiPoint", "coordinates": [[-8.628732, 41.169834], [-8.628732, 41.169843], [-8.628732, 41.169834], [-8.628381, 41.169771], [-8.626923, 41.169924], [-8.62569, 41.169411], [-8.624565, 41.169312], [-8.623782, 41.169213], [-8.623791, 41.169069], [-8.623125, 41.169141], [-8.621946, 41.169663], [-8.619561, 41.171166], [-8.616519, 41.171778], [-8.614818, 41.171823], [-8.613504, 41.17185], [-8.613207, 41.171895], [-8.612856, 41.17158], [-8.612874, 41.171238], [-8.611524, 41.17104], [-8.608725, 41.169492], [-8.607375, 41.169258], [-8.606799, 41.168475], [-8.606628, 41.168025], [-8.606628, 41.166576], [-8.607582, 41.166351], [-8.607573, 41.165712]]}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment