Skip to content

Instantly share code, notes, and snippets.

@pphetra
Last active March 7, 2020 04:58
Show Gist options
  • Save pphetra/7bb3ba7a4407c6cda19ac27bbe9305c6 to your computer and use it in GitHub Desktop.
Save pphetra/7bb3ba7a4407c6cda19ac27bbe9305c6 to your computer and use it in GitHub Desktop.
from pyspark.sql.functions import max
covid060320.select(max("Deaths")).take(1)
covid060320.createOrReplaceTempView("covit06032020")
val sqlWay = spark.sql("""
SELECT Country/Region, count(1)
FROM covit06032020
GROUP BY Country/Region
""")
val dataFrameWay = covid060320
.groupBy("Country/Region")
.count()
spark.sql("SELECT max(Deaths) from covit06032020").take(1)
import org.apache.spark.sql.functions.max
covid060320.select(max("Deaths")).take(1)
// in Scala
val maxSql = spark.sql("""
SELECT Country/Region, sum(count) as country
FROM covid060320
GROUP BY Country/Region
ORDER BY sum(count) DESC
LIMIT 5
""")
maxSql.show()
// in Scala
import org.apache.spark.sql.functions.desc
covid060320
.groupBy("Country/Region")
.sum("count")
.withColumnRenamed("sum(count)", "country")
.sort(desc("country"))
.limit(5)
.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment