Orbifold/GraphFrames.py

## GraphFrames.py


import pyspark
from pyspark import SparkContext, SparkConf, SQLContext
from pyspark.sql import SparkSession
conf = SparkConf().setMaster("local")
sc = SparkContext(conf=conf)
spark = SparkSession.builder.appName('Noether').getOrCreate()

sc.addPyFile("~/Downloads/graphframes-0.5.0-spark2.1-s_2.11.jar")

from graphframes import *
sqlContext = SQLContext(spark)
v = sqlContext.createDataFrame([
  ("a", "Alice", 34),
  ("b", "Bob", 36),
  ("c", "Charlie", 30),
  ("d", "David", 29),
  ("e", "Esther", 32),
  ("f", "Fanny", 36),
  ("g", "Gabby", 60)
], ["id", "name", "age"])
# Edge DataFrame
e = sqlContext.createDataFrame([
  ("a", "b", "friend"),
  ("b", "c", "follow"),
  ("c", "b", "follow"),
  ("f", "c", "follow"),
  ("e", "f", "follow"),
  ("e", "d", "friend"),
  ("d", "a", "friend"),
  ("a", "e", "friend")
], ["src", "dst", "relationship"])
# Create a GraphFrame
g = GraphFrame(v, e)


v = sqlContext.createDataFrame([
  ("a", "Alice", 34),
  ("b", "Bob", 36),
  ("c", "Charlie", 30),
], ["id", "name", "age"])
# Create an Edge DataFrame with "src" and "dst" columns
e = sqlContext.createDataFrame([
  ("a", "b", "friend"),
  ("b", "c", "follow"),
  ("c", "b", "follow"),
], ["src", "dst", "relationship"])
# Create a GraphFrame
from graphframes import *
g = GraphFrame(v, e)

# Query: Get in-degree of each vertex.
g.inDegrees.show()

# Query: Count the number of "follow" connections in the graph.
g.edges.filter("relationship = 'follow'").count()

# Run PageRank algorithm, and show results.
results = g.pageRank(resetProbability=0.01, maxIter=20)
results.vertices.select("id", "pagerank").show()


	import pyspark
	from pyspark import SparkContext, SparkConf, SQLContext
	from pyspark.sql import SparkSession
	conf = SparkConf().setMaster("local")
	sc = SparkContext(conf=conf)
	spark = SparkSession.builder.appName('Noether').getOrCreate()

	sc.addPyFile("~/Downloads/graphframes-0.5.0-spark2.1-s_2.11.jar")

	from graphframes import *
	sqlContext = SQLContext(spark)
	v = sqlContext.createDataFrame([
	("a", "Alice", 34),
	("b", "Bob", 36),
	("c", "Charlie", 30),
	("d", "David", 29),
	("e", "Esther", 32),
	("f", "Fanny", 36),
	("g", "Gabby", 60)
	], ["id", "name", "age"])
	# Edge DataFrame
	e = sqlContext.createDataFrame([
	("a", "b", "friend"),
	("b", "c", "follow"),
	("c", "b", "follow"),
	("f", "c", "follow"),
	("e", "f", "follow"),
	("e", "d", "friend"),
	("d", "a", "friend"),
	("a", "e", "friend")
	], ["src", "dst", "relationship"])
	# Create a GraphFrame
	g = GraphFrame(v, e)


	v = sqlContext.createDataFrame([
	("a", "Alice", 34),
	("b", "Bob", 36),
	("c", "Charlie", 30),
	], ["id", "name", "age"])
	# Create an Edge DataFrame with "src" and "dst" columns
	e = sqlContext.createDataFrame([
	("a", "b", "friend"),
	("b", "c", "follow"),
	("c", "b", "follow"),
	], ["src", "dst", "relationship"])
	# Create a GraphFrame
	from graphframes import *
	g = GraphFrame(v, e)

	# Query: Get in-degree of each vertex.
	g.inDegrees.show()

	# Query: Count the number of "follow" connections in the graph.
	g.edges.filter("relationship = 'follow'").count()

	# Run PageRank algorithm, and show results.
	results = g.pageRank(resetProbability=0.01, maxIter=20)
	results.vertices.select("id", "pagerank").show()