Skip to content

Instantly share code, notes, and snippets.

@tzachz
Created November 4, 2015 19:47
Show Gist options
  • Save tzachz/a7ad56767f961c289cb2 to your computer and use it in GitHub Desktop.
Save tzachz/a7ad56767f961c289cb2 to your computer and use it in GitHub Desktop.
Spark REST API usage example: shuffle memory totals
import org.json4s._
import org.json4s.jackson.JsonMethods.parse
import scala.io.Source.fromURL
object SparkAppStats {
/**
* (partial) representation of a Spark Stage object
*/
case class SparkStage(name: String, shuffleWriteBytes: Long, memoryBytesSpilled: Long, diskBytesSpilled: Long)
implicit val formats = DefaultFormats
val url = "http://<host>:4040/api/v1/applications/<app-name>/stages"
def main (args: Array[String]) {
val json = fromURL(url).mkString
val stages: List[SparkStage] = parse(json).extract[List[SparkStage]]
println("stages count: " + stages.size)
println("shuffleWriteBytes: " + stages.map(_.shuffleWriteBytes).sum)
println("memoryBytesSpilled: " + stages.map(_.memoryBytesSpilled).sum)
println("diskBytesSpilled: " + stages.map(_.diskBytesSpilled).sum)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment