asears/sparkaqe.ini

## sparkaqe.ini
# https://docs.databricks.com/en/optimizations/aqe.html#configuration

# disable
# spark.databricks.optimizer.adaptive.enabled false
# default
spark.databricks.optimizer.adaptive.enabled true

# default number of partitions to use for shuffling data for joins or aggs
# for small files, set a lower shuffle number
# default
# spark.sql.shuffle.partitions 200
# auto enables auto-optimized shufffle
spark.sql.shuffle.partitions auto

# switch sort merge to broadcast hash without a hint.
# Increase this for larger 'smaller' tables with larger worker sizes,
# as the tables can be broadcast to each worker for perf
# default
spark.databricks.adaptive.autoBroadcastJoinThreshold 30MB

# enable or disable partition coalescing
# merge partitions into fewer partitions to reduce the overhead when shuffling large datasets
# behind scenes it is using repartition() or coalesce() which could be used to force
# default
spark.sql.adaptive.coalescePartitions.enabled true

# The target size after coalescing.
# The coalesced partition sizes will be close to but no bigger than this target size.
# default
spark.sql.adaptive.advisoryPartitionSizeInBytes 64MB

# The minimum size of partitions after coalescing.
# The coalesced partition sizes will be no smaller than this size.
# default
spark.sql.adaptive.coalescePartitions.minPartitionSize 1MB

# The minimum number of partitions after coalescing.
# Not recommended, because setting explicitly overrides spark.sql.adaptive.coalescePartitions.minPartitionSize.

# default 2x no cluster cores
# spark.sql.adaptive.coalescePartitions.minPartitionNum

# Dynamic skew join
# spark.sql.adaptive.skewJoin.enabled false
# default
spark.sql.adaptive.skewJoin.enabled true

# A factor that when multiplied by the median partition size contributes to determining whether a partition is skewed.
# default
spark.sql.adaptive.skewJoin.skewedPartitionFactor 5

# A threshold that contributes to determining whether a partition is skewed.
spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 256MB

# Dynamically detect and propogate empty relations
spark.databricks.adaptive.emptyRelationPropagation.enabled true
	# https://docs.databricks.com/en/optimizations/aqe.html#configuration

	# disable
	# spark.databricks.optimizer.adaptive.enabled false
	# default
	spark.databricks.optimizer.adaptive.enabled true

	# default number of partitions to use for shuffling data for joins or aggs
	# for small files, set a lower shuffle number
	# default
	# spark.sql.shuffle.partitions 200
	# auto enables auto-optimized shufffle
	spark.sql.shuffle.partitions auto

	# switch sort merge to broadcast hash without a hint.
	# Increase this for larger 'smaller' tables with larger worker sizes,
	# as the tables can be broadcast to each worker for perf
	# default
	spark.databricks.adaptive.autoBroadcastJoinThreshold 30MB

	# enable or disable partition coalescing
	# merge partitions into fewer partitions to reduce the overhead when shuffling large datasets
	# behind scenes it is using repartition() or coalesce() which could be used to force
	# default
	spark.sql.adaptive.coalescePartitions.enabled true

	# The target size after coalescing.
	# The coalesced partition sizes will be close to but no bigger than this target size.
	# default
	spark.sql.adaptive.advisoryPartitionSizeInBytes 64MB

	# The minimum size of partitions after coalescing.
	# The coalesced partition sizes will be no smaller than this size.
	# default
	spark.sql.adaptive.coalescePartitions.minPartitionSize 1MB

	# The minimum number of partitions after coalescing.
	# Not recommended, because setting explicitly overrides spark.sql.adaptive.coalescePartitions.minPartitionSize.

	# default 2x no cluster cores
	# spark.sql.adaptive.coalescePartitions.minPartitionNum

	# Dynamic skew join
	# spark.sql.adaptive.skewJoin.enabled false
	# default
	spark.sql.adaptive.skewJoin.enabled true

	# A factor that when multiplied by the median partition size contributes to determining whether a partition is skewed.
	# default
	spark.sql.adaptive.skewJoin.skewedPartitionFactor 5

	# A threshold that contributes to determining whether a partition is skewed.
	spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 256MB

	# Dynamically detect and propogate empty relations
	spark.databricks.adaptive.emptyRelationPropagation.enabled true