Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import java.io.Serializable
import org.apache.spark.HashPartitioner
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.DoubleType
import org.graphframes.GraphFrame
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
case class EdgeAttr(var dstNeighbors: Array[Long] = Array.empty[Long],
var J: Array[Int] = Array.empty[Int],
var q: Array[Double] = Array.empty[Double]) extends Serializable
object RandomWalk extends App {
val spark: SparkSession = SparkSession
.builder
.master("local[*]")
.appName("RandomWalk")
.getOrCreate()
import spark.implicits._
val bcMaxDegree: Broadcast[PartitionID] = spark.sparkContext.broadcast(40)
val inputFilePath = "data/input.csv"
val inputTripletsDF: DataFrame = spark.read.option("inferSchema", true).option("header", true).csv(inputFilePath).toDF("id", "dst", "weight");
val node2attrDf: DataFrame = inputTripletsDF.withColumn("combined", array("dst", "weight")).drop("dst", "weight").groupBy("id").agg(collect_list(col("combined"))).toDF("id", "combined").withColumn("combined", sortAndCutDstsUdf($"combined"));
def sortAndCutDsts(combined: mutable.WrappedArray[mutable.WrappedArray[Int]]): Array[Array[Int]] = {
combined.toArray.map(_.toArray).sortBy(- _(1)).slice(0, bcMaxDegree.value)
}
val sortAndCutDsts:mutable.WrappedArray[mutable.WrappedArray[Int]] => Array[Array[Int]] = _.toArray.map(x => x.toArray).sortBy(_(2)).slice(0, bcMaxDegree.value)
val sortAndCutDstsUdf: UserDefinedFunction = udf(sortAndCutDsts)
val edge2attrDf = inputTripletsDF.drop("weight").withColumn("info", typedLit(EdgeAttr())).toDF("src", "dst", "info")
val gDF = GraphFrame(node2attrDf, edge2attrDf) // error! FIXME
gDF.first // error! FIXME
}
id dst weight
780 725 1
266 351 2
285 132 1
328 748 7
275 487 1
259 938 2
1474 427 1
208 66 1
925 97 1
194 237 1
323 275 2
436 82 1
83 76 1
385 660 1
355 898 1
528 1136 1
302 895 2
378 481 1
124 221 1
470 433 1
1047 625 1
274 228 1
19 251 1
117 467 1
133 175 1
274 117 2
100 427 1
412 122 1
820 111 1
137 423 1
15 143 2
864 1047 1
591 1016 3
8 87 4
317 435 2
84 418 1
928 24 1
70 661 1
900 262 1
56 323 1
937 350 1
95 98 1
79 617 1
939 504 1
155 393 2
28 185 1
129 544 1
692 208 1
14 508 4
752 331 1
291 601 1
66 416 1
369 925 1
316 252 1
284 367 1
355 350 1
302 315 3
505 173 1
32 811 1
87 783 1
1194 1 1
178 190 1
1014 126 1
652 469 1
924 274 1
156 644 1
161 1046 1
179 221 2
518 31 1
202 70 1
165 850 1
663 183 2
520 191 2
721 161 1
520 327 1
248 504 1
170 185 1
1048 1057 1
70 81 2
1375 224 1
455 521 1
206 433 1
718 124 1
118 845 3
178 430 2
172 607 1
204 22 1
154 708 1
1086 508 1
934 633 1
97 70 2
316 288 2
505 745 1
664 712 1
25 402 1
742 257 1
193 836 1
275 123 1
529 1050 1
343 334 2
101 549 1
173 431 1
239 265 1
186 135 1
813 42 1
218 173 1
135 194 1
68 231 1
271 165 1
324 9 1
159 1042 1
1 58 2
116 235 2
499 229 1
63 940 1
214 91 1
319 310 1
780 772 1
432 154 1
125 99 1
418 69 1
204 72 1
367 26 1
81 133 1
413 222 1
694 113 1
627 142 1
55 649 1
205 28 1
152 88 1
661 7 1
228 195 1
154 746 1
1664 1385 1
431 942 1
275 197 2
87 24 1
470 128 1
216 1119 1
200 356 1
94 812 1
403 201 1
15 591 2
143 432 1
133 1396 1
431 195 1
195 427 3
640 44 1
1267 79 1
294 301 6
8 646 1
483 356 1
86 963 1
273 742 2
455 250 1
42 429 1
5 39 1
64 421 1
501 584 1
499 478 1
194 96 4
268 991 1
655 157 1
471 546 3
178 520 2
242 340 2
99 421 1
482 295 1
645 792 1
636 132 1
614 502 1
191 617 1
528 1252 1
157 475 1
223 12 1
83 121 1
155 255 1
143 506 1
269 340 6
566 746 2
880 301 1
218 396 2
181 568 5
38 403 1
479 1547 1
739 226 1
98 230 1
8 194 2
100 936 1
47 163 1
640 1267 1
160 269 1
116 311 1
515 661 1
607 59 1
169 81 1
167 448 1
1137 100 1
492 133 1
715 458 2
53 751 1
717 696 1
187 661 1
648 385 1
23 11 2
582 447 1
83 1021 1
428 410 1
800 332 1
175 188 1
1558 1193 1
1028 108 1
71 511 1
311 268 1
513 607 1
1007 421 1
410 111 1
283 279 1
433 134 1
73 239 1
202 461 1
22 176 10
151 276 6
121 251 1
619 148 1
529 172 2
568 566 2
153 176 1
462 170 2
1368 221 1
118 728 1
1315 1014 1
653 195 1
12 205 1
1084 169 1
1105 312 1
185 519 1
934 756 1
558 64 1
210 82 6
597 550 1
179 501 1
836 1039 1
92 82 2
516 732 1
332 705 1
169 228 1
576 367 1
1203 954 1
415 1053 1
908 358 1
124 611 1
108 1048 1
825 576 1
239 1069 1
550 823 1
32 135 1
292 515 1
479 451 2
663 524 1
43 399 1
711 185 1
29 720 1
195 216 3
469 190 1
324 508 1
97 588 1
260 899 1
183 427 5
1126 225 1
974 82 1
423 504 1
100 347 1
319 187 2
176 187 4
86 658 1
144 1269 1
162 98 1
1137 1007 1
132 606 1
144 140 1
632 276 1
81 114 1
102 1444 1
872 259 1
946 1078 1
708 169 1
1095 815 1
648 692 2
295 19 1
318 496 4
265 196 3
673 199 1
1210 38 1
470 31 1
558 216 1
1450 521 1
403 82 2
223 317 2
260 329 1
530 48 2
293 11 1
195 276 2
603 179 1
472 943 1
414 566 1
1009 248 1
536 248 1
204 94 1
544 919 2
227 135 2
100 159 2
137 147 1
319 895 1
303 346 2
454 216 1
289 312 1
199 213 2
257 1142 1
402 121 1
503 403 1
692 812 1
425 168 1
173 549 1
284 117 2
402 447 1
180 134 3
53 432 2
240 153 1
166 113 1
332 329 1
189 176 2
656 1195 1
191 518 1
79 709 1
69 650 1
321 64 1
562 62 1
657 216 4
246 50 4
1189 480 1
651 196 1
90 393 1
112 50 1
507 523 1
762 980 1
84 184 1
47 52 1
451 855 1
607 172 2
222 202 3
79 839 1
960 511 1
799 1020 1
708 1232 1
269 45 1
307 328 6
678 329 1
23 156 2
724 511 1
79 655 2
270 1243 1
71 624 2
553 241 2
258 15 1
423 591 1
117 210 2
652 195 1
50 173 7
322 902 1
306 259 1
596 470 1
1039 155 1
366 755 1
292 242 1
679 966 1
325 289 1
22 463 1
204 1446 1
98 673 1
193 1103 1
248 14 1
232 755 1
514 176 3
451 403 1
234 33 1
197 187 1
70 443 2
642 88 1
257 254 2
497 721 1
222 518 1
116 13 2
79 187 4
245 687 1
197 111 1
302 508 1
597 1001 1
434 558 1
254 183 1
222 184 1
646 515 1
404 234 1
277 100 1
488 340 1
157 462 1
249 533 1
286 242 3
678 257 1
385 195 3
408 28 2
281 274 2
154 512 1
525 527 1
751 331 1
270 272 1
722 1425 1
276 19 2
331 1176 1
209 16 1
172 156 3
195 69 4
693 462 1
732 356 1
882 313 1
935 257 1
315 1176 2
423 109 1
135 638 1
483 478 1
467 965 1
33 403 1
740 1163 1
879 100 1
494 194 3
419 200 1
382 42 1
171 109 1
427 135 4
321 408 1
194 134 2
512 47 1
143 195 3
1125 173 1
39 50 1
213 582 1
524 692 1
318 127 1
378 458 1
659 161 1
825 137 1
13 30 1
147 117 3
403 479 1
631 293 1
479 200 1
751 326 1
156 253 1
514 174 1
59 794 1
730 22 1
89 134 3
472 826 1
257 283 1
421 207 2
405 1226 1
228 601 1
815 403 1
504 651 1
476 280 1
393 358 1
720 400 1
86 213 2
136 530 1
660 684 1
19 284 1
301 328 4
222 501 1
169 143 2
42 135 1
208 675 1
282 950 1
28 86 4
625 417 1
655 491 1
752 288 1
330 922 1
203 408 1
1012 1278 1
435 26 1
189 69 1
136 431 1
89 218 1
249 298 1
265 525 1
175 22 1
69 511 2
1194 42 1
1125 1286 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.