Skip to content

Instantly share code, notes, and snippets.

@amcp
Last active April 27, 2018 17:56
Show Gist options
  • Save amcp/0a28320ff5898ea54a78121e69bcfaa9 to your computer and use it in GitHub Desktop.
Save amcp/0a28320ff5898ea54a78121e69bcfaa9 to your computer and use it in GitHub Desktop.
This groovy script loads the Enron email graph and performs a traversal for the shortest path between two nodes
#!/bin/bash
wget https://snap.stanford.edu/data/email-Enron.txt.gz
gunzip email-Enron.txt.gz
bin/gremlin.sh #and then :load enron.groovy
graph = TinkerGraph.open()
graph.createIndex("nodeId", Vertex.class)
vertexMap = new HashMap<Integer, Vertex>();
new File("email-Enron.txt").eachLine { final String line ->
if(!line.startsWith("#")) {
g = graph.traversal()
outin = line.split("\\s+")
outV = Integer.valueOf(outin[0])
inV = Integer.valueOf(outin[1])
if(!vertexMap.containsKey(outV)) {
vertexMap.put(outV, graph.addVertex(label, 'node', 'nodeId', outV))
}
if(!vertexMap.containsKey(inV)) {
vertexMap.put(inV, graph.addVertex(label, 'node', 'nodeId', inV))
}
vertexMap.get(outV).addEdge('similar', vertexMap.get(inV))
}
}
class DepthPredicate implements java.util.function.Predicate<Traverser<T>> {
private final int hops;
public DepthPredicate(int hops) { this.hops = hops; }
public boolean test(Traverser<T> t) { return t.path().size() <= hops; }
}
d = new DepthPredicate(5)
start = Integer.valueOf(775);
end = Integer.valueOf(990);
//g.V().has("nodeId", start).repeat(out('similar').simplePath()).until(has('nodeId', end).and().filter(d)).limit(1).path().by('nodeId')
graph = com.thinkaurelius.titan.core.TitanFactory.build().set("storage.backend", "inmemory").
set("storage.transactions", "false").set("storage.batch-loading", "true").open()
mgmt = ((com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) graph).openManagement()
nodeIdPropKey = mgmt.makePropertyKey("nodeId").dataType(Integer.class).make()
mgmt.buildIndex('byNodeId', org.apache.tinkerpop.gremlin.structure.Vertex.class).addKey(nodeIdPropKey).unique().
buildCompositeIndex()
mgmt.makeEdgeLabel("similar").make()
mgmt.makeVertexLabel("node").make()
mgmt.commit()
graph.tx().commit()
new File("Email-Enron.txt").eachLine { final String line ->
if(!line.startsWith("#")) {
g = graph.traversal()
outin = line.split("\\s+")
outV = outin[0]
inV = outin[1]
def outVertex = g.V().has("nodeId", outV).tryNext().
orElseGet { graph.addVertex(label, 'node', 'nodeId', Integer.valueOf(outV.trim())) }
def inVertex = g.V().has("nodeId", inV).tryNext().
orElseGet { graph.addVertex(label, 'node', 'nodeId', Integer.valueOf(inV.trim())) }
outVertex.addEdge('similar', inVertex)
}
}
graph.tx().commit()
g = graph.traversal()
class DepthPredicate implements java.util.function.Predicate<Traverser<T>> {
private final int hops;
public DepthPredicate(int hops) { this.hops = hops; }
public boolean test(Traverser<T> t) { return t.path().size() <= hops; }
}
maxDepth = new DepthPredicate(5)
//gremlin server pegs processor to 100% and JVM image size increases without bound after the matching path is printed
//the console prompt delays very much to come back
g.V().has("nodeId", 775).repeat(out('similar').simplePath()).until(has('nodeId',990).and().filter(maxDepth))).
limit(1).path().by('nodeId')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment