Sathiyarajan/Cassandra Spark SQL

## Cassandra Spark SQL
//If you want to run a local cluster
//start-master.sh
//start-slave.sh <your-master-url>

// connect to spark-shell and load cassandra connector jar  OR
~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --packages datastax:spark-cassandra-connector:1.6.0-s_2.10

// connect to spark-shell to spark cluster and load cassandra connector jar
~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --master <your-master-url> --packages datastax:spark-cassandra-connector:1.6.0-s_2.10


// tell Spark catalogue about cassandara
// More info: internally there is a Catalogue in the spark session/spark sql similiar to
// hive metastore with entries for databases and tables

sqlContext.sql(
   """CREATE TEMPORARY TABLE precipitation
     |USING org.apache.spark.sql.cassandra
     |OPTIONS (
     |  keyspace "isd_weather_data",
     |  table "daily_aggregate_precip",
     |  cluster "<your-cluster>",
     |  pushdown "true"
     |)""".stripMargin)

// DSE we automatically generate this inside of internal CassandraHiveMetastore
// so when you run with dse the table is automatically located

// prepare dataframe from sql
val df = sqlContext.sql("select * from precipitation")

// run commands and note tab completion
df.show
df.orderBy("day")
df. <hit tab>
// etc
	//If you want to run a local cluster
	//start-master.sh
	//start-slave.sh <your-master-url>

	// connect to spark-shell and load cassandra connector jar OR
	~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --packages datastax:spark-cassandra-connector:1.6.0-s_2.10

	// connect to spark-shell to spark cluster and load cassandra connector jar
	~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --master <your-master-url> --packages datastax:spark-cassandra-connector:1.6.0-s_2.10


	// tell Spark catalogue about cassandara
	// More info: internally there is a Catalogue in the spark session/spark sql similiar to
	// hive metastore with entries for databases and tables

	sqlContext.sql(
	"""CREATE TEMPORARY TABLE precipitation
	\|USING org.apache.spark.sql.cassandra
	\|OPTIONS (
	\| keyspace "isd_weather_data",
	\| table "daily_aggregate_precip",
	\| cluster "<your-cluster>",
	\| pushdown "true"
	\|)""".stripMargin)

	// DSE we automatically generate this inside of internal CassandraHiveMetastore
	// so when you run with dse the table is automatically located

	// prepare dataframe from sql
	val df = sqlContext.sql("select * from precipitation")

	// run commands and note tab completion
	df.show
	df.orderBy("day")
	df. <hit tab>
	// etc