Skip to content

Instantly share code, notes, and snippets.

@joshdevins
Created March 27, 2017 08:40
Show Gist options
  • Save joshdevins/7f8a70e8ee9606827e7d16d5767df1a0 to your computer and use it in GitHub Desktop.
Save joshdevins/7f8a70e8ee9606827e7d16d5767df1a0 to your computer and use it in GitHub Desktop.
Spark 1.4.1 shell script to tune memory settings
/**
* Determines the compute and memory layouts given a Spark 1.4.x configuration.
*
* This should be run by applying all of the configuration except for the
* executor settings (as this would waste resources). E.g.:
*
spark-shell \
--master "local" \
--deploy-mode "client" \
--driver-memory 1G \
--num-executors 0 \
--conf "spark.storage.memoryFraction=0.9" \
-i sparkTuning.scala
*/
// TODO(jd): incorporate `spark.task.cpus`
def calculate(numExecutors: Int, numCoresPerExecutor: Int, executorMemory: Double): Unit = {
val totalCores = numExecutors * numCoresPerExecutor
val totalMemory = numExecutors * executorMemory
val conf = sc.getConf
val safetyMemoryFraction = conf.getDouble("spark.storage.safetyFraction", 0.9)
val storageMemoryFraction = conf.getDouble("spark.storage.memoryFraction", 0.6)
val unrollMemoryFraction = conf.getDouble("spark.storage.unrollFraction", 0.2)
val shuffleMemoryFraction = conf.getDouble("spark.shuffle.memoryFraction", 0.2)
val safetyMemory = executorMemory * safetyMemoryFraction
val storageMemory = safetyMemory * storageMemoryFraction
val unrollMemory = storageMemory * unrollMemoryFraction
val shuffleMemory = safetyMemory * shuffleMemoryFraction
val usableStorageMemory = storageMemory - unrollMemory
val totalUsableStorageMemory = numExecutors * usableStorageMemory
val workingMemory = safetyMemory - shuffleMemory - storageMemory
val workingMemoryPerCore = workingMemory / numCoresPerExecutor
val workingMemoryPerCoreMB = workingMemoryPerCore * 1024
println("Compute layout:")
println(s" total cores: $totalCores")
println("Memory layout:")
println(f" total memory: $totalMemory%1.2f GB")
println(f" total usable storage memory: $totalUsableStorageMemory%1.2f GB")
println(f" usable storage memory per executor: $usableStorageMemory%1.2f GB")
println(f" working memory per executor: $workingMemory%1.2f GB")
println(f" working memory per core: $workingMemoryPerCoreMB%1.2f MB")
}
calculate(numExecutors = 256, numCoresPerExecutor = 6, executorMemory = 24.0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment