Created
July 21, 2018 13:21
-
-
Save tomncooper/f29158dba6953b5908fb118c2dc3e0e6 to your computer and use it in GitHub Desktop.
script for comparing graph traversal path discovery to just inferring the paths by combining all instances of components into paths.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from gremlin_python.process.graph_traversal import out, has | |
def find_all_paths(gremlin_client, source_comp, sink_comp): | |
source_vertices = (gremlin_client.graph_traversal.V() | |
.hasLabel("instance") | |
.has("component", source_comp).toList()) | |
sink_task_ids = (gremlin_client.graph_traversal.V() | |
.hasLabel("instance").has("component", sink_comp) | |
.values("task_id").toList()) | |
all_paths = [] | |
for i, source in enumerate(source_vertices): | |
print(f"Finding paths for source {i+1} of {len(source_vertices)}") | |
for j, sink_id in enumerate(sink_task_ids): | |
print(f"\tFinding paths to sink {j+1} of {len(sink_task_ids)}") | |
paths = (gremlin_client.graph_traversal | |
.V(source) | |
.repeat(out().simplePath()) | |
.until(has("task_id", sink_id)) | |
.path() | |
.toList()) | |
all_paths.extend(paths) | |
return all_paths | |
def combinations(comp_instances, depth=0) : | |
""" Return all combinations of instances in the supplied list of instance | |
lists""" | |
for instance in comp_instances[0] : | |
if len(comp_instances) > 1 : | |
for result in combinations(comp_instances[1:], depth+1) : | |
yield [instance,] + result | |
else : | |
yield [instance,] | |
def create_paths(gremlin_client, comp_path): | |
# Move along each component path and get the instances for each component | |
# and create paths connecting each instance in that path. | |
instances = [] | |
# Get all the instances for each component in the path | |
for component in comp_path: | |
comp_instances = (graph_client.graph_traversal | |
.V().hasLabel("instance") | |
.has("component", component) | |
.toList()) | |
instances.append(comp_instances) | |
return combinations(instances) | |
def infer_paths(gremlin_client, source_comp, sink_comp): | |
# Find the component level path between the supplied source and sink | |
# Need to write a gremlin query to find all the source and sink components | |
# and then find all component level paths between them. | |
comp_paths = [["Sp", "A", "B", "C"]] | |
all_paths = [] | |
# Create instance paths for all components in the component path | |
for comp_path in comp_paths: | |
all_paths.extend(create_paths(gremlin_client, comp_path)) | |
return all_paths | |
def save_paths(paths, filename): | |
with open(filename, "w") as save_file: | |
writer = csv.writer(save_file) | |
writer.writerows(paths) | |
if __name__ == "__main__": | |
import datetime as dt | |
from tests.local.graph_connect import graph_client | |
# Assuming we have some way to find the source and sink components in the | |
# graph - should just be an easy query | |
source_comps = ["Sp"] | |
sink_comps = ["C"] | |
for source_comp in source_comps: | |
for sink_comp in sink_comps: | |
# Use the graph traversal method | |
start = dt.datetime.now() | |
graph_paths = find_all_paths(graph_client, source_comp, sink_comp) | |
stop = dt.datetime.now() | |
save_paths(graph_paths, | |
f"{source_comp}_{sink_comp}_graph_paths.csv") | |
elapsed = (stop-start).total_seconds() | |
print("Using graph traversal:") | |
print(f"Found {len(graph_paths)} paths from instances of component" | |
f" {source_comp} to instances of component {sink_comp}") | |
print(f"Elapsed time: {elapsed} seconds") | |
start2 = dt.datetime.now() | |
inferred_paths = infer_paths(graph_client, source_comp, sink_comp) | |
stop2 = dt.datetime.now() | |
save_paths(graph_paths, | |
f"{source_comp}_{sink_comp}_inferred_paths.csv") | |
elapsed2 = (stop2-start2).total_seconds() | |
print("By inferring paths:") | |
print(f"Found {len(inferred_paths)} paths from instances of " | |
f"component {source_comp} to instances of component " | |
f"{sink_comp}") | |
print(f"Elapsed time: {elapsed2} seconds") | |
print(f"Inferred is {round((elapsed / elapsed2),2)} times faster " | |
f"than the graph traversal") | |
graph_path_set = set(map(tuple, graph_paths)) | |
inferred_paths_set = set(map(tuple, inferred_paths)) | |
print("Differences in the paths found:") | |
print(graph_path_set.symmetric_difference(inferred_paths_set)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment