Skip to content

Instantly share code, notes, and snippets.

@pridkett
Created May 13, 2012 22:57
Show Gist options
  • Save pridkett/2690667 to your computer and use it in GitHub Desktop.
Save pridkett/2690667 to your computer and use it in GitHub Desktop.
Export the network of the FOLLOWER relationship in gitminer
/**
* This script will create a graphml file with the follower relation
* every user in a gitminer database.
*
* @author Patrick Wagstrom <patrick@wagstrom.net>
*/
/**
* Process the traversal to save FOLLOWER relationships
*
* This eliminates the biography property because it takes up a ton
* of space and we're not that concerned about it for our analysis.
*
* @param users: an iterable list of usrs. Must be all users in the
* set of you could experience some problems.
* @param outfile: the file to save the graph to
*/
def exportFollowerSubgraph(users, outfile) {
to = new TinkerGraph()
System.out.println("processing users")
for (vertex in users) {
System.out.println("user: " + vertex["login"])
toVertex = to.addVertex(vertex.getId())
ElementHelper.copyProperties(vertex, toVertex)
toVertex.setProperty('biography', null)
}
System.out.println("processing edges")
for (vertex in users) {
System.out.println("edges for user: " + vertex["login"])
for (edge in vertex.outE("FOLLOWER")) {
if (edge == null) continue;
System.out.println("edge: " + vertex["login"] + " => " + edge.getInVertex()["login"])
// System.out.println("to.v: " + to.v(vertex.getId()))
if (to.v(edge.getInVertex().getId()) == null) {
System.out.println("target vertex is out of set, skipping");
} else {
toEdge = to.addEdge(edge.getId(), to.v(vertex.getId()),
to.v(edge.getInVertex().getId()), edge.getLabel())
ElementHelper.copyProperties(edge, toEdge)
}
}
}
System.out.println("Writing graphml file")
GraphMLWriter writer = new GraphMLWriter()
writer.outputGraph(to, new FileOutputStream("follower.graphml"))
}
/**
* this just gets a couple of different ways that users can be
* associated with a project.
*
* a future post will contain more information about how to mine these
* relationships.
*
* @param repo: the vertex of the repository to start at
*/
def getAllUsersForRepo(repo) {
watchers = repo.in("REPO_WATCHED").toSet()
collaborators = repo.out("REPO_COLLABORATOR").toSet()
contributors = repo.out("REPO_CONTRIBUTOR").toSet() + \
repo.in("REPO_OWNER").dedup().toSet()
return watchers + collaborators + contributors
}
// get every user related to gremlin
gremlin = g.idx('repo-idx').get('reponame','tinkerpop/gremlin').next()
// println "Getting all users..."
users = getAllUsersForRepo(gremlin)
// save the graph to followers.graphml
exportFollowerSubgraph(users, "followers.graphml")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment