Remove pseudonodes from geodataframe full of linestrings
from shapely.ops import linemerge
import geopandas as gpd
import networkx as nx
def remove_pseudonodes(gdf, geom_col="geometry"):
def _gdf_to_graph(gdf, geom_col) -> nx.Graph:
# Find all unique start & end points and assign them an id
gdf["start_node_coords"] = gdf[geom_col].apply(lambda x: x.coords[0])
gdf["end_node_coords"] = gdf[geom_col].apply(lambda x: x.coords[-1])
node_ids = {}
i = 0
for index, row in gdf.iterrows():
node_1 = row["start_node_coords"]
node_2 = row["end_node_coords"]
if node_1 not in node_ids:
node_ids[node_1] = i
i += 1
if node_2 not in node_ids:
node_ids[node_2] = i
i += 1
# Assign the unique id to each
gdf["source"] = gdf["start_node_coords"].apply(lambda x: node_ids[x])
gdf["target"] = gdf["end_node_coords"].apply(lambda x: node_ids[x])
# Make the graph
graph = nx.from_pandas_edgelist(gdf, edge_attr=[geom_col])
return graph
def _path_to_edges(path, cycle=False):
# Turn the traversed node path into edge tuples
edges = []
for i in range(len(path) - 1):
edges.append((path[i], path[i + 1]))
if cycle:
edges.append((path[-1], path[0]))
return edges
# Make graph and find the tips and the forks
graph = _gdf_to_graph(gdf, geom_col)
tips_and_forks = [n for n in graph if, n) == 1 or, n) > 2]
# Iterate through all pairs of tips & forks and find the shortest path through the network
for start in tips_and_forks:
for end in tips_and_forks[tips_and_forks.index(start) + 1:]:
node_path = nx.shortest_path(graph, start, end)
# If there is even a single fork in between, this path is not a chain
degree_path = [False if, n) > 2 else True for n in node_path][1:-1]
chain = (set(degree_path) == {True})
# If it is a chain, merge the linestrings into a single linestring and replace he originals with it
if chain:
edge_path = _path_to_edges(node_path)
linestrings = [d[geom_col] for u, v, d in graph.edges(data=True) if (u, v) in edge_path]
new_linestring = linemerge(linestrings)
graph.add_edge(start, end, geometry=new_linestring)
# Convert back to geodataframe and return
clean_gdf = nx.to_pandas_edgelist(graph)
clean_gdf = gpd.GeoDataFrame(clean_gdf)
return clean_gdf
if __name__ == "__main__":
gdf = gpd.GeoDataFrame.from_file("<Path fo file containing linestrings with pseudonodes>")
clean_gdf = remove_pseudonodes(gdf) = {"init": "epsg:<Desired EPSG code>"}
clean_gdf.to_file("<Path to file to contain linestrings with removed pseudonodes>", driver="GPKG")
