tmcgrath/Cassandra Join

## Cassandra Join
//If you want to run a local cluster
//start-master.sh
//start-slave.sh <your-master-url>

// connect to spark-shell and load cassandra connector jar  OR
~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --packages datastax:spark-cassandra-connector:1.6.0-s_2.10

// connect to spark-shell to spark cluster and load cassandra connector jar
~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --master <your-master-url> --packages datastax:spark-cassandra-connector:1.6.0-s_2.10


// tell Spark catalogue about cassandara
// More info: internally there is a Catalogue in the spark session/spark sql similiar to
// hive metastore with entries for databases and tables

sqlContext.sql(
   """CREATE TEMPORARY TABLE precipitation
     |USING org.apache.spark.sql.cassandra
     |OPTIONS (
     |  keyspace "isd_weather_data",
     |  table "daily_aggregate_precip",
     |  cluster "<your-cluster>",
     |  pushdown "true"
     |)""".stripMargin)

// DSE we automatically generate this inside of internal CassandraHiveMetastore
// so when you run with dse the table is automatically located
 sqlContext.sql(
    """CREATE TEMPORARY TABLE station
      |USING org.apache.spark.sql.cassandra
      |OPTIONS (
      |  keyspace "isd_weather_data",
      |  table "weather_station",
      |  cluster "<your-cluster>",
      |  pushdown "true"
      |)""".stripMargin)

//join
sqlContext.sql("select p.*, s.country_code from precipitation p join station s on p.wsid = s.id")
	//If you want to run a local cluster
	//start-master.sh
	//start-slave.sh <your-master-url>

	// connect to spark-shell and load cassandra connector jar OR
	~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --packages datastax:spark-cassandra-connector:1.6.0-s_2.10

	// connect to spark-shell to spark cluster and load cassandra connector jar
	~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --master <your-master-url> --packages datastax:spark-cassandra-connector:1.6.0-s_2.10


	// tell Spark catalogue about cassandara
	// More info: internally there is a Catalogue in the spark session/spark sql similiar to
	// hive metastore with entries for databases and tables

	sqlContext.sql(
	"""CREATE TEMPORARY TABLE precipitation
	\|USING org.apache.spark.sql.cassandra
	\|OPTIONS (
	\| keyspace "isd_weather_data",
	\| table "daily_aggregate_precip",
	\| cluster "<your-cluster>",
	\| pushdown "true"
	\|)""".stripMargin)

	// DSE we automatically generate this inside of internal CassandraHiveMetastore
	// so when you run with dse the table is automatically located
	sqlContext.sql(
	"""CREATE TEMPORARY TABLE station
	\|USING org.apache.spark.sql.cassandra
	\|OPTIONS (
	\| keyspace "isd_weather_data",
	\| table "weather_station",
	\| cluster "<your-cluster>",
	\| pushdown "true"
	\|)""".stripMargin)

	//join
	sqlContext.sql("select p.*, s.country_code from precipitation p join station s on p.wsid = s.id")