Behar Veliqi bveliqi

## pagerank.scala
val results = graph.pageRank
                   .resetProbability(0.01)
                   .maxIter(100)
                   .run()

## gist:7f089715cca9e46f91516b83cd11cef8
+------------------+---------------+
|                id|     screenName|
+------------------+---------------+
|763776167061155842|PaigeJo94076042|
|         104147773|  JordanCrane86|
|         207258117|   DarrylSparey|
|        2718405648|      Clyde5591|
                  ...
+------------------+---------------+

## graph.scala
val vertices = idTable.select("id", "screenName")
                      .distinct()

val edges = idTable.selectExpr("id as src", "friend_id as dst")
                   .where('dst.isNotNull)
                   .withColumn("relationship", lit("follower"))

val graph = GraphFrame(vertices, edges)

## gist:6d2f648e9e7fca04f45e157342f572a7
+----------+------------+------------------+
|id        |screenName  |friend_id         |
+----------+------------+------------------+
|51878493  |_notmichelle|60789485          |
|51878493  |_notmichelle|2420931980        |
|51878493  |_notmichelle|2899776756        |
                ...
|1393409100|jesseayye   |86868062          |
|1393409100|jesseayye   |19697415          |
|1393409100|jesseayye   |2998836604        |

## relationsship.scala
val relationships = df.select("id", "screenName", "friends")
  .withColumn("friends", regexp_replace('friends, "\\[ | ]", ""))
  .withColumn("friends", split('friends, "; "))
  .withColumn("friend_id", explode('friends))
  .withColumn("friend_id", regexp_replace('friend_id, "\"", ""))
  .withColumn("id", 'id.cast(LongType))
  .withColumn("friend_id", 'friend_id.cast(LongType))
  .drop("friends")

## gist:3172877df1ba7d0f78b41d3f4a2a7e94
import org.graphframes._

## gist:e1ff1db0ee17353a7c1e2d69b83ad1c6
z.load(“graphframes:graphframes:0.5.0-spark2.1-s_2.11”)

## gist:ddb12ac1626b25e59c0073958b10877b
sed -i -e 's/", "/"; "/g' twitter_friends.csv

## gist:23e7d31008b69e2a50c5965bacbe93a2
id,screenName,tags,avatar,followersCount,friendsCount,lang,lastSeen,tweetId,friends
"51878493","_notmichelle",[ "#nationaldogday" ],"http://pbs.twimg.com/profile_images/761977602173046786/4_utEHsD_normal.jpg",275,115,"en",1472270622663,"769309490038439936",[ "60789485"; "2420931980"; "2899776756"; "127410795"; "38747286"; "1345516880"; "236076395"; "1242946609"; "2567887488"; "280777286"; "2912446303"; "1149916171"; "3192577639"; "239569380"; "229974168"; "389097282"; "266336410"; "1850301204"; "2364414805"; "812302213"; "2318240348"; "158634793"; "542282350"; "569664772"; "766573472"; "703551325"; "168564432"; "261054460"; "402980453"; "562547390"; "539630318"; "165167145"; "22216387"; "427568285"; "61033129"; "213519434"; "373092437"; "170762012"; "273601960"; "322108757"; "1681816280"; "357843027"; "737471496"; "406541143"; "1084122632"; "633477616"; "537821327"; "793079732"; "2386380799"; "479015607"; "783354019"; "365171478"; "625002575"; "2326207404"; "1653286842"; "1676964216"; "2296617326"; "158369219

## readcsv.scala
val df = spark.read
              .option("header", true)
              .csv("/path/to/twitter_friends.csv")
	val results = graph.pageRank
	.resetProbability(0.01)
	.maxIter(100)
	.run()
	+------------------+---------------+
	\| id\| screenName\|
	+------------------+---------------+
	\|763776167061155842\|PaigeJo94076042\|
	\| 104147773\| JordanCrane86\|
	\| 207258117\| DarrylSparey\|
	\| 2718405648\| Clyde5591\|
	...
	+------------------+---------------+
	val vertices = idTable.select("id", "screenName")
	.distinct()

	val edges = idTable.selectExpr("id as src", "friend_id as dst")
	.where('dst.isNotNull)
	.withColumn("relationship", lit("follower"))

	val graph = GraphFrame(vertices, edges)
	+----------+------------+------------------+
	\|id \|screenName \|friend_id \|
	+----------+------------+------------------+
	\|51878493 \|_notmichelle\|60789485 \|
	\|51878493 \|_notmichelle\|2420931980 \|
	\|51878493 \|_notmichelle\|2899776756 \|
	...
	\|1393409100\|jesseayye \|86868062 \|
	\|1393409100\|jesseayye \|19697415 \|
	\|1393409100\|jesseayye \|2998836604 \|
	val relationships = df.select("id", "screenName", "friends")
	.withColumn("friends", regexp_replace('friends, "\\[ \| ]", ""))
	.withColumn("friends", split('friends, "; "))
	.withColumn("friend_id", explode('friends))
	.withColumn("friend_id", regexp_replace('friend_id, "\"", ""))
	.withColumn("id", 'id.cast(LongType))
	.withColumn("friend_id", 'friend_id.cast(LongType))
	.drop("friends")
	val df = spark.read
	.option("header", true)
	.csv("/path/to/twitter_friends.csv")