Last active
August 11, 2023 17:02
-
-
Save animeshtrivedi/03917931208bad19ab6c2e62ecb33ce5 to your computer and use it in GitHub Desktop.
A spark configuration for some performance knobs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# RDD settings | |
spark.rdd.compress false | |
# Shuffle settings | |
spark.shuffle.manager sort | |
spark.shuffle.compress false | |
spark.shuffle.spill false | |
spark.shuffle.spill.compress false | |
spark.shuffle.sort.initialBufferSize 4194304 | |
spark.shuffle.sort.bypassMergeThreshold 200 | |
spark.shuffle.file.buffer 1m | |
spark.shuffle.io.serverThreads 16 | |
spark.shuffle.io.clientThreads 16 | |
spark.shuffle.io.threads 16 | |
# Locality/scheduling related settings | |
spark.locality.wait 0 | |
spark.locality.wait.node 0 | |
spark.locality.wait.process 0 | |
spark.locality.wait.rack 0 | |
# Reducer settings | |
spark.reducer.maxSizeInFlight 1024p | |
#spark.reducer.maxReqsInFlight 1000000000 | |
# RPC settings | |
spark.rpc.io.serverThreads 16 | |
spark.rpc.io.clientThreads 16 | |
spark.rpc.io.threads 16 | |
# File io settings | |
spark.files.io.serverThreads 16 | |
spark.files.io.clientThreads 16 | |
spark.files.io.threads 16 | |
# Broadcast settings | |
spark.broadcast.compress false | |
spark.broadcast.checksum false | |
# Kryo specific settings | |
spark.serializer org.apache.spark.serializer.KryoSerializer | |
spark.kryoserializer.buffer 1mb | |
spark.kryoserializer.buffer.max 128mb | |
# Driver settings | |
spark.driver.maxResultSize 128m | |
spark.driver.extraJavaOptions -Dsun.nio.PageAlignDirectMemory=true | |
#spark.driver.extraClassPath /home/???/crail/jars/*:/home/???/spark/extra-jars/*:. | |
# Executor settings | |
spark.executor.extraJavaOptions -Dsun.nio.PageAlignDirectMemory=true | |
#spark.executor.extraClassPath /home/???/crail/jars/*:/home/???/spark/extra-jars/*:. | |
# SQL settings | |
spark.sql.crossJoin.enabled true | |
spark.sql.shuffle.partitions 200 | |
spark.sql.files.openCostInBytes 0 | |
spark.sql.files.maxPartitionBytes 1280255769 | |
spark.sql.parquet.compression.codec uncompressed | |
spark.sql.codegen.wholeStage true | |
# Spark webserver specific settings | |
#spark.eventLog.enabled true | |
#spark.eventLog.dir hdfs://????:9000/shared-spark-logs | |
#for webserver | |
#spark.history.provider org.apache.spark.deploy.history.FsHistoryProvider | |
#spark.history.fs.logDirectory hdfs://????:9000/shared-spark-logs | |
# Failure settings | |
spark.yarn.max.executor.failures 1 | |
spark.yarn.maxAppAttempts 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
can you explain what
spark.driver.rpc.io.serverThreads
does?