Skip to content

Instantly share code, notes, and snippets.

@jovianlin
Created April 21, 2017 13:34
Show Gist options
  • Save jovianlin/c145057b64bf7b1e0e916cc985e96534 to your computer and use it in GitHub Desktop.
Save jovianlin/c145057b64bf7b1e0e916cc985e96534 to your computer and use it in GitHub Desktop.
sample code for pyspark
from pyspark import SparkContext
from pyspark.sql import HiveContext
from graphframes.examples import Graphs
sc = SparkContext()
sc.setLogLevel("ERROR")
sqlContext = HiveContext(sc)
g = Graphs(sqlContext).friends() # Get example graph
print('Run PageRank until convergence to tolerance "tol".')
results = g.pageRank(resetProbability=0.15, tol=0.01)
print()
print('# Display resulting pageranks and final edge weights')
print(results.vertices.select("id", "pagerank").show())
print()
print(results.edges.select("src", "dst", "weight").show())
print()
print('# Run PageRank for a fixed number of iterations.')
results2 = g.pageRank(resetProbability=0.15, maxIter=10)
print(results2)
print()
print('# Run PageRank personalized for vertex "a"')
results3 = g.pageRank(resetProbability=0.15, maxIter=10, sourceId="a")
print(results3)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment