Skip to content

Instantly share code, notes, and snippets.

@makmanalp
Created September 12, 2018 19:39
Show Gist options
  • Save makmanalp/0bba6cf6ff6cba1fe54d1c1fc5ea2a09 to your computer and use it in GitHub Desktop.
Save makmanalp/0bba6cf6ff6cba1fe54d1c1fc5ea2a09 to your computer and use it in GitHub Desktop.
Read / write d3 style network JSON files with pandas, preserving order and types
import pandas as pd
import json
def read_network(file_name, nodes_field="nodes", edges_field="edges"):
network = None
with open(file_name, "r") as f:
network = json.loads(f.read())
nodes = network[nodes_field]
edges = network[edges_field]
other_fields = {x:network[x] for x in network.keys()
if x not in [nodes_field, edges_field]}
return pd.DataFrame.from_records(nodes), pd.DataFrame.from_records(edges), other_fields
def to_records(df):
"""Replacement for pandas' to_dict(orient="records") which has issues with
upcasting ints to floats in the case of other floats being there.
https://github.com/pandas-dev/pandas/issues/12859
"""
return json.loads(df.to_json(orient="records"))
def write_network(file_name, nodes, edges, other_fields=None, nodes_field="nodes", edges_field="edges"):
with open(file_name, "w") as f:
network = {}
network[nodes_field] = to_records(nodes)
network[edges_field] = to_records(edges)
if other_fields is not None:
network.update(other_fields)
f.write(json.dumps(network, indent=4, separators=(',', ': ')))
def remap_network_ids(nodes, edges, conversion_mapping, id_field="id", source_field="source", target_field="target"):
"""conversion_mapping is a series where the index is the old ids, and the
values are the new ones."""
conversion_mapping = conversion_mapping.copy()
conversion_mapping.columns = [id_field]
nodes = nodes.merge(conversion_mapping,
left_on=id_field, right_index=True,
how="left", suffixes=("_old", ""))
nodes = nodes.drop([x for x in nodes.columns if x.endswith("_old")], axis=1)
# merge-and-replace
# Merge column renaming semantics
conversion_mapping.columns = [source_field]
edges = edges\
.merge(conversion_mapping,
left_on=source_field, right_index=True,
how="left", suffixes=("_old", ""))
conversion_mapping.columns = [target_field]
edges = edges\
.merge(conversion_mapping,
left_on=target_field, right_index=True,
how="left", suffixes=("_old", ""))
edges = edges.drop([x for x in edges.columns if x.endswith("_old")], axis=1)
return nodes, edges
def find_neighbors(nodes, edges, this_node, extra_fields=[]):
connected_edges = edges[(edges.source == this_node) | (edges.target == this_node)].copy()
def get_other_node(this_node):
def inner(df):
if df.source == this_node:
return int(df.target)
else:
return int(df.source)
return inner
connected_edges["id"] = connected_edges.apply(get_other_node(this_node), axis=1)
connected_edges = connected_edges[["id"] + extra_fields]
return to_records(connected_edges)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment