Created
February 21, 2014 22:18
-
-
Save l1x/9144794 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//http://amplab.github.io/graphx/ | |
// Connect to the Spark cluster | |
val sc = new SparkContext("spark://master.amplab.org", "research") | |
// Load my user data and prase into tuples of user id and attribute list | |
val users = sc.textFile("hdfs://user_attributes.tsv") | |
.map(line => line.split).map( parts => (parts.head, parts.tail) ) | |
// Parse the edge data which is already in userId -> userId format | |
val followerGraph = Graph.textFile(sc, "hdfs://followers.tsv") | |
// Attach the user attributes | |
val graph = followerGraph.outerJoinVertices(users){ | |
case (uid, deg, Some(attrList)) => attrList | |
// Some users may not have attributes so we set them as empty | |
case (uid, deg, None) => Array.empty[String] | |
} | |
// Restrict the graph to users which have exactly two attributes | |
val subgraph = graph.subgraph((vid, attr) => attr.size == 2) | |
// Compute the PageRank | |
val pagerankGraph = Analytics.pagerank(subgraph) | |
// Get the attributes of the top pagerank users | |
val userInfoWithPageRank = subgraph.outerJoinVertices(pagerankGraph.vertices){ | |
case (uid, attrList, Some(pr)) => (pr, attrList) | |
case (uid, attrList, None) => (pr, attrList) | |
} | |
println(userInfoWithPageRank.top(5)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment