jmrr/mysql2parquet.scala

## mysql2parquet.scala
val sqlContext = new org.apache.spark.sql.SQLContext(sc) // optional

val df = sqlContext.load("jdbc", Map(
          "url" -> "jdbc:mysql://<ip.address.your.db>/<table>?user=<username>&password=<pwd>",
          "dbtable" -> "<tablename>"))

df.select("<col1>","<col2>","<col3>").save("</path/to/parquet/file.parquet>","parquet")

//Alternatively, to save all the columns:

df.write.parquet("</path/to/parquet/file.parquet>")
	val sqlContext = new org.apache.spark.sql.SQLContext(sc) // optional

	val df = sqlContext.load("jdbc", Map(
	"url" -> "jdbc:mysql://<ip.address.your.db>/<table>?user=<username>&password=<pwd>",
	"dbtable" -> "<tablename>"))

	df.select("<col1>","<col2>","<col3>").save("</path/to/parquet/file.parquet>","parquet")

	//Alternatively, to save all the columns:

	df.write.parquet("</path/to/parquet/file.parquet>")