Skip to content

Instantly share code, notes, and snippets.

@tomncooper
Created July 21, 2018 13:21
Show Gist options
  • Save tomncooper/f29158dba6953b5908fb118c2dc3e0e6 to your computer and use it in GitHub Desktop.
Save tomncooper/f29158dba6953b5908fb118c2dc3e0e6 to your computer and use it in GitHub Desktop.
script for comparing graph traversal path discovery to just inferring the paths by combining all instances of components into paths.
import csv
from gremlin_python.process.graph_traversal import out, has
def find_all_paths(gremlin_client, source_comp, sink_comp):
source_vertices = (gremlin_client.graph_traversal.V()
.hasLabel("instance")
.has("component", source_comp).toList())
sink_task_ids = (gremlin_client.graph_traversal.V()
.hasLabel("instance").has("component", sink_comp)
.values("task_id").toList())
all_paths = []
for i, source in enumerate(source_vertices):
print(f"Finding paths for source {i+1} of {len(source_vertices)}")
for j, sink_id in enumerate(sink_task_ids):
print(f"\tFinding paths to sink {j+1} of {len(sink_task_ids)}")
paths = (gremlin_client.graph_traversal
.V(source)
.repeat(out().simplePath())
.until(has("task_id", sink_id))
.path()
.toList())
all_paths.extend(paths)
return all_paths
def combinations(comp_instances, depth=0) :
""" Return all combinations of instances in the supplied list of instance
lists"""
for instance in comp_instances[0] :
if len(comp_instances) > 1 :
for result in combinations(comp_instances[1:], depth+1) :
yield [instance,] + result
else :
yield [instance,]
def create_paths(gremlin_client, comp_path):
# Move along each component path and get the instances for each component
# and create paths connecting each instance in that path.
instances = []
# Get all the instances for each component in the path
for component in comp_path:
comp_instances = (graph_client.graph_traversal
.V().hasLabel("instance")
.has("component", component)
.toList())
instances.append(comp_instances)
return combinations(instances)
def infer_paths(gremlin_client, source_comp, sink_comp):
# Find the component level path between the supplied source and sink
# Need to write a gremlin query to find all the source and sink components
# and then find all component level paths between them.
comp_paths = [["Sp", "A", "B", "C"]]
all_paths = []
# Create instance paths for all components in the component path
for comp_path in comp_paths:
all_paths.extend(create_paths(gremlin_client, comp_path))
return all_paths
def save_paths(paths, filename):
with open(filename, "w") as save_file:
writer = csv.writer(save_file)
writer.writerows(paths)
if __name__ == "__main__":
import datetime as dt
from tests.local.graph_connect import graph_client
# Assuming we have some way to find the source and sink components in the
# graph - should just be an easy query
source_comps = ["Sp"]
sink_comps = ["C"]
for source_comp in source_comps:
for sink_comp in sink_comps:
# Use the graph traversal method
start = dt.datetime.now()
graph_paths = find_all_paths(graph_client, source_comp, sink_comp)
stop = dt.datetime.now()
save_paths(graph_paths,
f"{source_comp}_{sink_comp}_graph_paths.csv")
elapsed = (stop-start).total_seconds()
print("Using graph traversal:")
print(f"Found {len(graph_paths)} paths from instances of component"
f" {source_comp} to instances of component {sink_comp}")
print(f"Elapsed time: {elapsed} seconds")
start2 = dt.datetime.now()
inferred_paths = infer_paths(graph_client, source_comp, sink_comp)
stop2 = dt.datetime.now()
save_paths(graph_paths,
f"{source_comp}_{sink_comp}_inferred_paths.csv")
elapsed2 = (stop2-start2).total_seconds()
print("By inferring paths:")
print(f"Found {len(inferred_paths)} paths from instances of "
f"component {source_comp} to instances of component "
f"{sink_comp}")
print(f"Elapsed time: {elapsed2} seconds")
print(f"Inferred is {round((elapsed / elapsed2),2)} times faster "
f"than the graph traversal")
graph_path_set = set(map(tuple, graph_paths))
inferred_paths_set = set(map(tuple, inferred_paths))
print("Differences in the paths found:")
print(graph_path_set.symmetric_difference(inferred_paths_set))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment