Skip to content

Instantly share code, notes, and snippets.

@MarioAriasC
Last active June 8, 2020 03:35
Show Gist options
  • Save MarioAriasC/020b166bdc896ad95a8b to your computer and use it in GitHub Desktop.
Save MarioAriasC/020b166bdc896ad95a8b to your computer and use it in GitHub Desktop.
SQL queries on Kotlin data class with Apache Spark
package org.cakesolutions.spark.sql
import org.apache.spark.SparkConf
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Row
import org.apache.spark.sql.SQLContext
import org.cakesolutions.spark.awaitEnter
import java.io.Serializable
fun main(args: Array<String>) = awaitEnter {
val conf = SparkConf().setMaster("local").setAppName("SQL")
val sc = JavaSparkContext(conf)
val sqlCtx = SQLContext(sc)
val happyPeopleRDD = sc.parallelize(listOf(
HappyPerson("Mario", "Colombian Coffee"),
HappyPerson("Freddy", "Cheap Coffee"),
HappyPerson("Ben", "Tea"),
HappyPerson("Vicente", "Tequila"),
HappyPerson("Hideo", "Sake")
))
val happyPeopleDF = sqlCtx.applySchema(happyPeopleRDD, javaClass<HappyPerson>())
//with like VB!!!
with(happyPeopleDF) {
registerTempTable("happy_people")
cache()
// this is happyPeopleDF
info(this)
}
val coffeeDrinkers = sqlCtx.sql("""
select name from happy_people where favouriteBeverage like '%Coffee%'
""")
coffeeDrinkers.collectAsList().forEach { row -> println(row.string[0]) }
}
data class HappyPerson(var name: String? = null,
var favouriteBeverage: String? = null) : Serializable
private fun info(df: DataFrame) {
df.show()
df.printSchema()
}
//DSLy stuff like KotlinPrimavera
val Row.string: GetFieldToken<String>
get () {
return GetFieldToken { getString(it) }
}
class GetFieldToken<T>(val extract: (Int) -> T) {
fun get(index: Int) = extract(index)
}
//Wait for a new line to finish the program
fun awaitEnter(body: () -> Unit) {
body()
val scanner = Scanner(System.`in`)
scanner.nextLine()
println("Bye!")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment