Skip to content

Instantly share code, notes, and snippets.

View bveliqi's full-sized avatar
🤙
wuuzzzzuuuuupppppp?!

Behar Veliqi bveliqi

🤙
wuuzzzzuuuuupppppp?!
View GitHub Profile
val results = graph.pageRank
.resetProbability(0.01)
.maxIter(100)
.run()
+------------------+---------------+
| id| screenName|
+------------------+---------------+
|763776167061155842|PaigeJo94076042|
| 104147773| JordanCrane86|
| 207258117| DarrylSparey|
| 2718405648| Clyde5591|
...
+------------------+---------------+
val vertices = idTable.select("id", "screenName")
.distinct()
val edges = idTable.selectExpr("id as src", "friend_id as dst")
.where('dst.isNotNull)
.withColumn("relationship", lit("follower"))
val graph = GraphFrame(vertices, edges)
+----------+------------+------------------+
|id |screenName |friend_id |
+----------+------------+------------------+
|51878493 |_notmichelle|60789485 |
|51878493 |_notmichelle|2420931980 |
|51878493 |_notmichelle|2899776756 |
...
|1393409100|jesseayye |86868062 |
|1393409100|jesseayye |19697415 |
|1393409100|jesseayye |2998836604 |
val relationships = df.select("id", "screenName", "friends")
.withColumn("friends", regexp_replace('friends, "\\[ | ]", ""))
.withColumn("friends", split('friends, "; "))
.withColumn("friend_id", explode('friends))
.withColumn("friend_id", regexp_replace('friend_id, "\"", ""))
.withColumn("id", 'id.cast(LongType))
.withColumn("friend_id", 'friend_id.cast(LongType))
.drop("friends")
import org.graphframes._
z.load(“graphframes:graphframes:0.5.0-spark2.1-s_2.11”)
sed -i -e 's/", "/"; "/g' twitter_friends.csv
id,screenName,tags,avatar,followersCount,friendsCount,lang,lastSeen,tweetId,friends
"51878493","_notmichelle",[ "#nationaldogday" ],"http://pbs.twimg.com/profile_images/761977602173046786/4_utEHsD_normal.jpg",275,115,"en",1472270622663,"769309490038439936",[ "60789485"; "2420931980"; "2899776756"; "127410795"; "38747286"; "1345516880"; "236076395"; "1242946609"; "2567887488"; "280777286"; "2912446303"; "1149916171"; "3192577639"; "239569380"; "229974168"; "389097282"; "266336410"; "1850301204"; "2364414805"; "812302213"; "2318240348"; "158634793"; "542282350"; "569664772"; "766573472"; "703551325"; "168564432"; "261054460"; "402980453"; "562547390"; "539630318"; "165167145"; "22216387"; "427568285"; "61033129"; "213519434"; "373092437"; "170762012"; "273601960"; "322108757"; "1681816280"; "357843027"; "737471496"; "406541143"; "1084122632"; "633477616"; "537821327"; "793079732"; "2386380799"; "479015607"; "783354019"; "365171478"; "625002575"; "2326207404"; "1653286842"; "1676964216"; "2296617326"; "158369219
val df = spark.read
.option("header", true)
.csv("/path/to/twitter_friends.csv")