Skip to content

Instantly share code, notes, and snippets.

@pcace
Forked from philippkraft/README.md
Last active November 20, 2023 09:54
Show Gist options
  • Save pcace/aef0d9437ca05ef656a9d3e45e67cca0 to your computer and use it in GitHub Desktop.
Save pcace/aef0d9437ca05ef656a9d3e45e67cca0 to your computer and use it in GitHub Desktop.
Convert xyz elevation models to GeoTiff

Convert .xyz elevation models to GeoTiff

In Germany, more and more state agencies allow free access to high resolution elevation models. However, these are often released as xyz tables, which are not easily used in GIS environments. A standard method to convert this format to raster formats (eg. GeoTiff) is the GDAL function gdal_translate [1, 2]. However, converting 1M lines takes dozens of seconds and is not trivial to parallize.

This gist, using highly optimized Python library Pandas does the conversion on the same computer in less then 1 second. The script parallizes the conversion of multiple files and translates 100 files á 1M lines in 17 seconds on 16 core machine and a fast SSD. gdal_translate took 40 min.

Prerequisites:

$ pip install numpy pandas rasterio

Usage:

To do the conversion:

python3 xyz2tif.py --crs 'EPSG_25832' *.xyz

To merge the resulting files into one raster

python3 xyz2tif.py merge *.tif --output merged.tif

Licence (MIT)

Copyright 2022, Philipp Kraft

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"""
Convert .xyz elevation models to GeoTiff
========================================
This gist, using highly optimized Python library Pandas does the conversion on the same computer in less then 1 second.
The script parallizes the conversion of multiple files and translates 150 files á 1M lines in 17 seconds on
16 core machine and a fast SSD. gdal_translate took 50 min.
Prerequisites:
--------------
$ pip install numpy pandas rasterio
Usage:
------
$ python3 xyz2tif.py --crs 'EPSG:25832' *.xyz
To merge the resulting files into one raster
$ python3 xyz2tif.py merge *.tif
"""
import sys
import os
import multiprocessing
import time
import pandas as pd
import numpy as np
import rasterio as rio
from rasterio import transform as riotrans
import argparse
def load_xyz(fn: str) -> pd.DataFrame:
"""
Loads a xyz data table from disk
Parameters
----------
fn : str
File name
"""
return pd.read_table(fn, names='x y z'.split(), sep=r'\s+', index_col=False)
def check_coords(xyz: pd.DataFrame):
x_count = len(xyz['x'].unique())
y_count = len(xyz['y'].unique())
z_count = len(xyz['z'].unique())
print(f'Distinct values: {x_count} x, {y_count} y and {z_count} z values')
def xyz2matrix(xyz: pd.DataFrame) -> (np.ndarray, float, float, float, float):
"""
Converts the xyz dataframe to a 2d numpy array with origin and bounding box
"""
mat = xyz.pivot_table(index='y', columns='x', values='z')
mat.sort_index(axis='index', ascending=False, inplace=True)
mat.sort_index(axis='columns', ascending=True, inplace=True)
# Get origin (upper left corner)
cellsize_x = mat.columns[1] - mat.columns[0]
cellsize_y = mat.index[1] - mat.index[0]
south = mat.index.min() - cellsize_y / 2
north = mat.index.max() + cellsize_y / 2
west = mat.columns.min() - cellsize_x / 2
east = mat.columns.max() + cellsize_x / 2
arr = np.asarray(mat, dtype=np.float32)
return arr, west, south, east, north
def merge(outfile, *in_files):
"""
Merges in_files (*.tif) to outfile
"""
from rasterio.merge import merge as rmerge
t0 = time.time()
rasters = [rio.open(fn) for fn in in_files]
t1 = time.time() -t0
print(f'{t1:0.1f}s : Merging {len(rasters)} rasters')
arr, transform = rmerge(rasters)
t2 = time.time() - t0
print(f'{t2:0.1f}s : Save {arr.shape[1]} x {arr.shape[2]} raster to {outfile}')
with rio.open(
outfile, 'w',
driver='GTiff',
height= arr.shape[1], width = arr.shape[2],
count=arr.shape[0], dtype=str(arr.dtype),
crs=CRS, transform=transform,
compress='lzw', num_threads=os.cpu_count()
) as raster:
raster.write(arr)
t3 = time.time() - t0
print(f'{t3:0.1f}s : Done')
def matrix2raster(fn_out:str, arr: np.ndarray, west: float, south: float, east: float, north: float):
transform = riotrans.from_bounds(
west=west, south=south,
east=east, north=north,
width=arr.shape[1],
height=arr.shape[0]
)
with rio.open(
fn_out, 'w',
driver='GTiff',
height= arr.shape[0], width = arr.shape[1],
count=1, dtype=str(arr.dtype),
crs=CRS, transform=transform, compress='lzw'
) as raster:
raster.write(arr, 1)
def process(fn_in, fn_out=None):
xyz = load_xyz(fn_in)
arr, west, south, east, north = xyz2matrix(xyz)
fn_out = fn_out or fn_in.rsplit('.', 1)[0] + '.tif'
matrix2raster(fn_out, arr, west, south, east, north)
return fn_out
def multi_process(files, *, nproc=None):
nproc = nproc or os.cpu_count()
with multiprocessing.Pool(nproc) as pool:
total = len(files)
print(f'Running {total} files on {nproc} processes')
t0 = time.time()
for i, fn_out in enumerate(pool.imap_unordered(process, files)):
et = time.time() - t0
tt = et * total / (i + 1)
print(f'{i}/{total} : {fn_out} {et:0.1f}s/{tt:0.1f}s')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert .xyz elevation models to GeoTiff')
parser.add_argument('files', nargs='+', help='Input files')
parser.add_argument('--crs', default='EPSG:25832', help='Coordinate reference system')
parser.add_argument('--merge', action='store_true', help='Merge the resulting files into one raster')
parser.add_argument('--output', default='all_merge.tif', help='Output filename for merged file')
args = parser.parse_args()
if args.merge:
merge(args.output, *args.files, crs=args.crs)
else:
multi_process(args.files, crs=args.crs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment