Skip to content

Instantly share code, notes, and snippets.

@ajzeigert
Last active April 12, 2023 19:53
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajzeigert/7cdd9a8301f540764a001bb4a380e4b3 to your computer and use it in GitHub Desktop.
Save ajzeigert/7cdd9a8301f540764a001bb4a380e4b3 to your computer and use it in GitHub Desktop.
Convert KML/KMZ with table data in description field to Shapefile with data in actual fields
# Convert KML/KMZ to Shapefile
# This script converts an input KML/KMZ file to an ESRI Shapefile
# and additionally parses any HTML table found in the "description" field
# and converts the data to actual fields
# Dependencies must be installed first: gdal, fiona, lxml
# Example usage: python KMLtoSHP.py inFile.kmz outFile.shp
import os
import sys
import json
import ogr
import copy
import fiona
from lxml import etree
if len(sys.argv) < 3:
print("This script requires an input and output file, for example: python KMLtoSHP.py inFile.kml outfile.shp")
else:
inFile = sys.argv[1]
outFile = sys.argv[2]
reader = ogr.Open(inFile)
layer = reader.GetLayer(0)
features = []
for feature in layer:
features.append(json.loads(feature.ExportToJson()))
layer.ResetReading() # Allows us to iterate from the beginning later
# Process description field
# This script checks for a description field, and tres to parse any table it finds and add those to the properties list
for feature in features:
# print(feature)
if 'description' in feature['properties']:
table = etree.HTML(feature['properties']['description'])
xpath1 = '/html/body/table'
rows = table.xpath(xpath1)[0].findall('tr')
if len(rows) > 0:
for row in rows:
cells = row.getchildren()
feature['properties'][cells[0].text] = cells[1].text
del feature['properties']['description']
print("Found data in description table, converting")
# Set the correct driver
output_driver = 'ESRI Shapefile'
# Get the spatial reference of the source layer
output_srs = layer.GetSpatialRef().ExportToWkt()
# Create an array of the properties and assign them all as a string type
tempArray = []
for property in features[0]['properties'].keys():
tempProp = (property, 'str')
tempArray.append(tempProp)
# Create an output schema for our features to conform to
output_schema = {
'type': 'Feature',
'geometry': 'Point',
'properties': tempArray
}
# Open our new file
with fiona.open(
outFile,
'w',
driver=output_driver,
crs=output_srs,
schema=output_schema
) as c:
# Loop through our features and write to the new file
for feature in features:
print("Writing feature")
c.write(feature)
print(inFile + " converted to " + outFile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment