tmcgrath/Spark SQL with Scala using mySQL (JDBC) data source

## Spark SQL with Scala using mySQL (JDBC) data source
todd-mcgraths-macbook-pro:spark-1.4.1-bin-hadoop2.4 toddmcgrath$ bin/spark-shell --jars mysql-connector-java-5.1.38-bin.jar
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 1.4.1
      /_/

Using Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_66)
Type in expressions to have them evaluated.
Type :help for more information.
Spark context available as sc.
SQL context available as sqlContext.

scala> val dataframe_mysql = sqlContext.read.format("jdbc").option("url", "jdbc:mysql://localhost/sparksql").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "baby_names").option("user", "root").option("password", "root").load()
dataframe_mysql: org.apache.spark.sql.DataFrame = [id: bigint, year: int, first_name: string, county: string, sex: string, count: int]

scala> dataframe_mysql.show
+--+----+----------+-----------+---+-----+
|id|year|first_name|     county|sex|count|
+--+----+----------+-----------+---+-----+
| 1|2012|   DOMINIC|     CAYUGA|  M|    6|
| 2|2012|   ADDISON|   ONONDAGA|  F|   14|
| 3|2012|     JULIA|   ONONDAGA|  F|   15|
| 4|2012|   WILLIAM|WESTCHESTER|  M|   41|
| 5|2012|    AMELIA|   ONONDAGA|  F|   15|
| 6|2012|      LILY|   ONONDAGA|  F|   15|
| 7|2012|     AIDAN|WESTCHESTER|  M|   16|
| 8|2012|    STEVEN|WESTCHESTER|  M|   16|
| 9|2012|       MAX|WESTCHESTER|  M|   16|
|10|2012|  BENJAMIN|WESTCHESTER|  M|   40|
|11|2012|    JUSTIN|WESTCHESTER|  M|   39|
|12|2012|   ABIGAIL|   ONONDAGA|  F|   15|
|13|2012| SEBASTIAN|WESTCHESTER|  M|   39|
|14|2012|   BRANDON|WESTCHESTER|  M|   38|
|15|2012|    JOSHUA|    SUFFOLK|  M|   56|
|16|2012|       MIA|   ONONDAGA|  F|   16|
|17|2012|     EMILY|   ONONDAGA|  F|   16|
|18|2012|     GRACE|   ONONDAGA|  F|   17|
|19|2012|     JASON|    SUFFOLK|  M|   55|
|20|2012|    THOMAS|    SUFFOLK|  M|   55|
+--+----+----------+-----------+---+-----+


scala> dataframe_mysql.registerTempTable("names")

scala> dataframe_mysql.sqlContext.sql("select * from names limit 10").collect.foreach(println)
[1,2012,DOMINIC,CAYUGA,M,6]
[2,2012,ADDISON,ONONDAGA,F,14]
[3,2012,JULIA,ONONDAGA,F,15]
[4,2012,WILLIAM,WESTCHESTER,M,41]
[5,2012,AMELIA,ONONDAGA,F,15]
[6,2012,LILY,ONONDAGA,F,15]
[7,2012,AIDAN,WESTCHESTER,M,16]
[8,2012,STEVEN,WESTCHESTER,M,16]
[9,2012,MAX,WESTCHESTER,M,16]
[10,2012,BENJAMIN,WESTCHESTER,M,40]
	todd-mcgraths-macbook-pro:spark-1.4.1-bin-hadoop2.4 toddmcgrath$ bin/spark-shell --jars mysql-connector-java-5.1.38-bin.jar
	Welcome to
	____ __
	/ __/__ ___ _____/ /__
	_\ \/ _ \/ _ `/ __/ '_/
	/___/ .__/\_,_/_/ /_/\_\ version 1.4.1
	/_/

	Using Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_66)
	Type in expressions to have them evaluated.
	Type :help for more information.
	Spark context available as sc.
	SQL context available as sqlContext.

	scala> val dataframe_mysql = sqlContext.read.format("jdbc").option("url", "jdbc:mysql://localhost/sparksql").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "baby_names").option("user", "root").option("password", "root").load()
	dataframe_mysql: org.apache.spark.sql.DataFrame = [id: bigint, year: int, first_name: string, county: string, sex: string, count: int]

	scala> dataframe_mysql.show
	+--+----+----------+-----------+---+-----+
	\|id\|year\|first_name\| county\|sex\|count\|
	+--+----+----------+-----------+---+-----+
	\| 1\|2012\| DOMINIC\| CAYUGA\| M\| 6\|
	\| 2\|2012\| ADDISON\| ONONDAGA\| F\| 14\|
	\| 3\|2012\| JULIA\| ONONDAGA\| F\| 15\|
	\| 4\|2012\| WILLIAM\|WESTCHESTER\| M\| 41\|
	\| 5\|2012\| AMELIA\| ONONDAGA\| F\| 15\|
	\| 6\|2012\| LILY\| ONONDAGA\| F\| 15\|
	\| 7\|2012\| AIDAN\|WESTCHESTER\| M\| 16\|
	\| 8\|2012\| STEVEN\|WESTCHESTER\| M\| 16\|
	\| 9\|2012\| MAX\|WESTCHESTER\| M\| 16\|
	\|10\|2012\| BENJAMIN\|WESTCHESTER\| M\| 40\|
	\|11\|2012\| JUSTIN\|WESTCHESTER\| M\| 39\|
	\|12\|2012\| ABIGAIL\| ONONDAGA\| F\| 15\|
	\|13\|2012\| SEBASTIAN\|WESTCHESTER\| M\| 39\|
	\|14\|2012\| BRANDON\|WESTCHESTER\| M\| 38\|
	\|15\|2012\| JOSHUA\| SUFFOLK\| M\| 56\|
	\|16\|2012\| MIA\| ONONDAGA\| F\| 16\|
	\|17\|2012\| EMILY\| ONONDAGA\| F\| 16\|
	\|18\|2012\| GRACE\| ONONDAGA\| F\| 17\|
	\|19\|2012\| JASON\| SUFFOLK\| M\| 55\|
	\|20\|2012\| THOMAS\| SUFFOLK\| M\| 55\|
	+--+----+----------+-----------+---+-----+


	scala> dataframe_mysql.registerTempTable("names")

	scala> dataframe_mysql.sqlContext.sql("select * from names limit 10").collect.foreach(println)
	[1,2012,DOMINIC,CAYUGA,M,6]
	[2,2012,ADDISON,ONONDAGA,F,14]
	[3,2012,JULIA,ONONDAGA,F,15]
	[4,2012,WILLIAM,WESTCHESTER,M,41]
	[5,2012,AMELIA,ONONDAGA,F,15]
	[6,2012,LILY,ONONDAGA,F,15]
	[7,2012,AIDAN,WESTCHESTER,M,16]
	[8,2012,STEVEN,WESTCHESTER,M,16]
	[9,2012,MAX,WESTCHESTER,M,16]
	[10,2012,BENJAMIN,WESTCHESTER,M,40]