Ankur Dave ankurdave

## test-py.html
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<!-- Created by htmlize-1.51 in inline-css mode. -->
<html>
  <head>
    <title>test.py</title>
  </head>
  <body style="color: #DCDCCC; background-color: #3F3F3F;">
    <pre>
<span style="color: #F0DFAF; font-weight: bold;">def</span> <span style="color: #93E0E3;">test1</span>(<span style="color: #fff099;">a</span>, <span style="color: #c2b6e1;">b</span>):
    <span style="color: #F0DFAF; font-weight: bold;">return</span> <span style="color: #fff099;">a</span> + <span style="color: #c2b6e1;">b</span>

## juju-demo.sh
brew install juju
juju generate-config
export AWS_ACCESS_KEY_ID=...
export AWS_SECRET_ACCESS_KEY=...
juju bootstrap
juju status

juju deploy juju-gui --to 0
juju expose juju-gui
# Wait until `juju status` shows it has started

## spark-nested-groupBy.scala
val rdd = sc.parallelize((0 until 1000).map(x => (1, x)) ++ List((2,1), (2,2)))
// rdd: org.apache.spark.rdd.RDD[(Int, Int)]
rdd.collect
// res1: Array[(Int, Int)] = Array((1,0), (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7), (1,8), (1,9), (1,10), (1,11), (1,12), (1,13), (1,14), (1,15), (1,16), (1,17), (1,18), (1,19), (1,20), (1,21), (1,22), (1,23), (1,24), (1,25), (1,26), (1,27), (1,28), (1,29), (1,30), (1,31), (1,32), (1,33), (1,34), (1,35), (1,36), (1,37), (1,38), (1,39), (1,40), (1,41), (1,42), (1,43), (1,44), (1,45), (1,46), (1,47), (1,48), (1,49), (1,50), (1,51), (1,52), (1,53), (1,54), (1,55), (1,56), (1,57), (1,58), (1,59), (1,60), (1,61), (1,62), (1,63), (1,64), (1,65), (1,66), (1,67), (1,68), (1,69), (1,70), (1,71), (1,72), (1,73), (1,74), (1,75), (1,76), (1,77), (1,78), (1,79), (1,80), (1,81), (1,82), (1,83), (1,84), (1,85), (1,86), (1,87), (1,88), (1,89), (1,90), (1,91), (1,92), (1,93), (1,94), (1,95), (1,96),...
val nestedGroups = rdd.groupBy(kv => (kv._1, kv._2 % 10)).groupBy(_._1._1).map(_._2

## graphx-bfs.scala
import org.apache.spark.graphx._

/**
 * Returns the shortest directed-edge path from src to dst in the graph. If no path exists, returns
 * the empty list.
 */
def bfs[VD, ED](graph: Graph[VD, ED], src: VertexId, dst: VertexId): Seq[VertexId] = {
  if (src == dst) return List(src)

  // The attribute of each vertex is (dist from src, id of vertex with dist-1)

## A.java
public class A<T> {
    public A(T t) {
        this.t = t;
    }
    public T t;

    public void print() {
        System.out.println(t.getClass().getSimpleName() + " " + t);
    }
}

## 2015-08-12-graphframes-demo.scala
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.DataFrame

val conf = new SparkConf()
val sc = new SparkContext("local", "test")
val sqlContext = new SQLContext(sc)

val v = sqlContext.createDataFrame(List(

## build.sbt
name := "my-project"

version := "0.1-SNAPSHOT"

organization := "com.example"

scalaVersion := "2.10.4"

libraryDependencies += "org.apache.spark" %% "spark-core" % "1.2.1"

## subgraphWithNeighbors.scala
import org.apache.spark.rdd.RDD
import org.apache.spark.graphx._
import scala.reflect.ClassTag

/** Returns the subgraph of `graph` containing only `vertices` and their neighbors. */
def subgraphWithNeighbors[VD, ED: ClassTag, A: ClassTag](
    graph: Graph[VD, ED], vertices: RDD[(VertexId, A)]): Graph[VD, ED] = {
  // Label each vertex in graph with true if it is a member of `vertices` and false if not
  val labeledGraph = graph.outerJoinVertices(vertices) {
    (id, oldAttr, isSampled) => isSampled.nonEmpty

## fringe-set.scala
// Depends on AllPairsShortestPaths: https://github.com/apache/spark/pull/3619

import org.apache.spark.graphx._
import org.apache.spark.graphx.lib._

val edges = sc.parallelize((0 until 10).map(x => Edge(x, x + 1, 1)))
val graph = Graph.fromEdges(edges, 1)

val dists = AllPairsShortestPaths.run(graph).cache()
val maxDists = dists.mapValues(_._2).reduceByKey((a, b) => if (a > b) a else b)

## gist:cb89391101e4e87497ae
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.expressions.Row
import scala.reflect.ClassTag

// Take edges and build a graph
def a(vertices: RDD[(Long, Row)], edges: RDD[(Long, Long)]): Graph[Row, Unit] =
  Graph(vertices, edges.map(pair => Edge(pair._1, pair._2, Unit)))

// Run connected components on the graph
	<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
	<!-- Created by htmlize-1.51 in inline-css mode. -->
	<html>
	<head>
	<title>test.py</title>
	</head>
	<body style="color: #DCDCCC; background-color: #3F3F3F;">
	<pre>
	<span style="color: #F0DFAF; font-weight: bold;">def</span> <span style="color: #93E0E3;">test1</span>(<span style="color: #fff099;">a</span>, <span style="color: #c2b6e1;">b</span>):
	<span style="color: #F0DFAF; font-weight: bold;">return</span> <span style="color: #fff099;">a</span> + <span style="color: #c2b6e1;">b</span>
	brew install juju
	juju generate-config
	export AWS_ACCESS_KEY_ID=...
	export AWS_SECRET_ACCESS_KEY=...
	juju bootstrap
	juju status

	juju deploy juju-gui --to 0
	juju expose juju-gui
	# Wait until `juju status` shows it has started
	val rdd = sc.parallelize((0 until 1000).map(x => (1, x)) ++ List((2,1), (2,2)))
	// rdd: org.apache.spark.rdd.RDD[(Int, Int)]
	rdd.collect
	// res1: Array[(Int, Int)] = Array((1,0), (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7), (1,8), (1,9), (1,10), (1,11), (1,12), (1,13), (1,14), (1,15), (1,16), (1,17), (1,18), (1,19), (1,20), (1,21), (1,22), (1,23), (1,24), (1,25), (1,26), (1,27), (1,28), (1,29), (1,30), (1,31), (1,32), (1,33), (1,34), (1,35), (1,36), (1,37), (1,38), (1,39), (1,40), (1,41), (1,42), (1,43), (1,44), (1,45), (1,46), (1,47), (1,48), (1,49), (1,50), (1,51), (1,52), (1,53), (1,54), (1,55), (1,56), (1,57), (1,58), (1,59), (1,60), (1,61), (1,62), (1,63), (1,64), (1,65), (1,66), (1,67), (1,68), (1,69), (1,70), (1,71), (1,72), (1,73), (1,74), (1,75), (1,76), (1,77), (1,78), (1,79), (1,80), (1,81), (1,82), (1,83), (1,84), (1,85), (1,86), (1,87), (1,88), (1,89), (1,90), (1,91), (1,92), (1,93), (1,94), (1,95), (1,96),...
	val nestedGroups = rdd.groupBy(kv => (kv._1, kv._2 % 10)).groupBy(_._1._1).map(_._2
	import org.apache.spark.graphx._

	/**
	* Returns the shortest directed-edge path from src to dst in the graph. If no path exists, returns
	* the empty list.
	*/
	def bfs[VD, ED](graph: Graph[VD, ED], src: VertexId, dst: VertexId): Seq[VertexId] = {
	if (src == dst) return List(src)

	// The attribute of each vertex is (dist from src, id of vertex with dist-1)
	public class A<T> {
	public A(T t) {
	this.t = t;
	}
	public T t;

	public void print() {
	System.out.println(t.getClass().getSimpleName() + " " + t);
	}
	}
	import org.apache.spark.SparkConf
	import org.apache.spark.SparkContext
	import org.apache.spark.sql.SQLContext
	import org.apache.spark.sql.DataFrame

	val conf = new SparkConf()
	val sc = new SparkContext("local", "test")
	val sqlContext = new SQLContext(sc)

	val v = sqlContext.createDataFrame(List(
	name := "my-project"

	version := "0.1-SNAPSHOT"

	organization := "com.example"

	scalaVersion := "2.10.4"

	libraryDependencies += "org.apache.spark" %% "spark-core" % "1.2.1"
	import org.apache.spark.rdd.RDD
	import org.apache.spark.graphx._
	import scala.reflect.ClassTag

	/** Returns the subgraph of `graph` containing only `vertices` and their neighbors. */
	def subgraphWithNeighbors[VD, ED: ClassTag, A: ClassTag](
	graph: Graph[VD, ED], vertices: RDD[(VertexId, A)]): Graph[VD, ED] = {
	// Label each vertex in graph with true if it is a member of `vertices` and false if not
	val labeledGraph = graph.outerJoinVertices(vertices) {
	(id, oldAttr, isSampled) => isSampled.nonEmpty
	// Depends on AllPairsShortestPaths: https://github.com/apache/spark/pull/3619

	import org.apache.spark.graphx._
	import org.apache.spark.graphx.lib._

	val edges = sc.parallelize((0 until 10).map(x => Edge(x, x + 1, 1)))
	val graph = Graph.fromEdges(edges, 1)

	val dists = AllPairsShortestPaths.run(graph).cache()
	val maxDists = dists.mapValues(_._2).reduceByKey((a, b) => if (a > b) a else b)
	import org.apache.spark.graphx._
	import org.apache.spark.rdd.RDD
	import org.apache.spark.sql.catalyst.expressions.Row
	import scala.reflect.ClassTag

	// Take edges and build a graph
	def a(vertices: RDD[(Long, Row)], edges: RDD[(Long, Long)]): Graph[Row, Unit] =
	Graph(vertices, edges.map(pair => Edge(pair._1, pair._2, Unit)))

	// Run connected components on the graph