sergey-dryabzhinsky/sysctl-proxmox-tune.conf

## sysctl-proxmox-tune.conf
###
# Proxmox or other server kernel params cheap tune and secure.
# Try it if you have heavy load on server - network or memory / disk.
# No harm assumed but keep your eyes open.
#
# @updated: 2020-02-06 - more params used, adjust some params values, more comments on params
#

### NETWORK ###

# Timeout broken connections faster (amount of time to wait for FIN)
net.ipv4.tcp_fin_timeout = 10

# Wait a maximum of 5 * 2 = 10 seconds in the TIME_WAIT state after a FIN, to handle
# any remaining packets in the network.
# load module nf_contrack if needed
net.netfilter.nf_conntrack_tcp_timeout_fin_wait = 5

# Keepalive optimizations
# By default, the keepalive routines wait for two hours (7200 secs) before sending the first keepalive probe,
# and then resend it every 75 seconds. If no ACK response is received for 9 consecutive times, the connection is marked as broken.
# The default values are: tcp_keepalive_time = 7200, tcp_keepalive_intvl = 75, tcp_keepalive_probes = 9
# We would decrease the default values for tcp_keepalive_* params as follow:
# Disconnect dead TCP connections after 10 minutes
net.ipv4.tcp_keepalive_time = 600
# Determines the wait time between isAlive interval probes (reduce from 75 sec to 15)
net.ipv4.tcp_keepalive_intvl = 15
# Determines the number of probes before timing out (reduce from 9 sec to 5 sec)
net.ipv4.tcp_keepalive_probes = 5

# allow that much active connections
net.core.somaxconn = 256000

# Protection from SYN flood attack.
net.ipv4.tcp_syncookies = 1
# Only retry creating TCP connections twice
# Minimize the time it takes for a connection attempt to fail
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_orphan_retries = 2

# Handle SYN floods and large numbers of valid HTTPS connections
net.ipv4.tcp_max_syn_backlog = 40000

# Increase the length of the network device input queue
net.core.netdev_max_backlog = 50000

# Faster full-speed than cubic
# And faster recover if connection looses packets
net.ipv4.tcp_congestion_control = yeah
# http://lwn.net/Articles/616241/
net.core.default_qdisc = fq_codel

# Increase ephermeral IP ports
net.ipv4.ip_local_port_range = 10000    60000

# Broken combined
net.ipv4.tcp_tw_reuse = 0
# The net.ipv4.tcp_tw_recycle has been removed from Linux 4.12 on 2017.
# Removed by upstream kernel, absent since PVE 5.1.
# So comment it out if you use PVE 5.1+
# Let's mark it - PVE3
net.ipv4.tcp_tw_recycle = 0

# Don't need IPv6 for now
# If you use IPv6 - comment this line
net.ipv6.conf.all.disable_ipv6 = 1

# https://www.serveradminblog.com/2011/02/neighbour-table-overflow-sysctl-conf-tunning/
net.ipv4.neigh.default.gc_thresh1 = 1024
net.ipv4.neigh.default.gc_thresh2 = 2048
net.ipv4.neigh.default.gc_thresh3 = 4096

# http://www.opennet.ru/opennews/art.shtml?num=44945
net.ipv4.tcp_challenge_ack_limit = 9999

# Don't slow network - save congestion window after idle
# https://github.com/ton31337/tools/wiki/tcp_slow_start_after_idle---tcp_no_metrics_save-performance
net.ipv4.tcp_slow_start_after_idle = 0

# If we must send packets at first place, but throughput is on second
# Or many small packets.
net.ipv4.tcp_low_latency = 1

#### PVE ####

# Allow a high number of timewait sockets
net.ipv4.tcp_max_tw_buckets = 2000000

# PVE 3
net.ipv4.tcp_max_tw_buckets_ub = 65000


# Increase Linux autotuning TCP buffer limits
net.ipv4.tcp_wmem = 4096        65536   16777216
net.ipv4.tcp_rmem = 4096        87380   16777216
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.core.optmem_max = 65536


# If your servers talk UDP, also up these limits
net.ipv4.udp_rmem_min = 8192
net.ipv4.udp_wmem_min = 8192

# Sockets/UDP query length
net.unix.max_dgram_qlen = 1024

# http://vds-admin.ru/unix-linux/oshibki-v-dmesg-vida-nfconntrack-table-full-dropping-packet
# load module nf_contrack if needed
net.netfilter.nf_conntrack_max = 1048576
net.nf_conntrack_max = 1048576


### MEMORY ###

# do less swap but not disable it
vm.swappiness = 2

# allow application request allocation of virtual memory
# more than real RAM size (or OpenVZ/LXC limits)
vm.overcommit_memory = 1

# https://major.io/2008/12/03/reducing-inode-and-dentry-caches-to-keep-oom-killer-at-bay/
vm.vfs_cache_pressure = 500

# time in  centi-sec. i.e. 100 points = 1 second
# delayed write of dirty data
vm.dirty_writeback_centisecs = 3000
# flush from memory old dirty data
vm.dirty_expire_centisecs = 18000

##
# Adjust vfs cache
# https://lonesysadmin.net/2013/12/22/better-linux-disk-caching-performance-vm-dirty_ratio/
# Decriase dirty cache to faster flush on disk
vm.dirty_background_ratio = 5
vm.dirty_ratio = 10

#### PVE 3 ####

# Only on Proxmox 3.x with OpenVZ
ubc.dirty_ratio = 20
ubc.dirty_background_ratio = 10

# Isolate page cache for VPS.
ubc.pagecache_isolation = 1

### FileSystem ###

##
# Fix: Failed to allocate directory watch: Too many open files
#      in Proxmox 5 + LXC
#      And VM with Bitrix
#      == alot of files

fs.inotify.max_user_instances = 16777216
fs.inotify.max_queued_events = 32000
fs.inotify.max_user_watches = 64000


### Security ###

# http://www.opennet.ru/opennews/art.shtml?num=47792
kernel.unprivileged_bpf_disabled=1

# http://www.opennet.ru/opennews/art.shtml?num=49135
# https://community.rti.com/kb/how-can-i-improve-my-throughput-performance-linux
net.ipv4.ipfrag_high_thresh=8388608
net.ipv4.ipfrag_low_thresh=196608
net.ipv6.ip6frag_high_thresh=8388608
net.ipv6.ip6frag_low_thresh=196608

# http://www.opennet.ru/opennews/art.shtml?num=50889
net.ipv4.tcp_sack = 0
net.ipv4.tcp_mtu_probing = 0

# Prevent TIME_WAIT attak.
net.ipv4.tcp_rfc1337 = 1


### OTHER ###

### PVE 3 - 6 kernels ###
# https://tweaked.io/guide/kernel/
# Don't migrate processes between CPU cores too often
# kernel <= 5.4 (ie Proxmox 6)
kernel.sched_migration_cost_ns = 5000000
# Kernel >= 2.6.38 (ie Proxmox 4+)
kernel.sched_autogroup_enabled = 0
	###
	# Proxmox or other server kernel params cheap tune and secure.
	# Try it if you have heavy load on server - network or memory / disk.
	# No harm assumed but keep your eyes open.
	#
	# @updated: 2020-02-06 - more params used, adjust some params values, more comments on params
	#

	### NETWORK ###

	# Timeout broken connections faster (amount of time to wait for FIN)
	net.ipv4.tcp_fin_timeout = 10

	# Wait a maximum of 5 * 2 = 10 seconds in the TIME_WAIT state after a FIN, to handle
	# any remaining packets in the network.
	# load module nf_contrack if needed
	net.netfilter.nf_conntrack_tcp_timeout_fin_wait = 5

	# Keepalive optimizations
	# By default, the keepalive routines wait for two hours (7200 secs) before sending the first keepalive probe,
	# and then resend it every 75 seconds. If no ACK response is received for 9 consecutive times, the connection is marked as broken.
	# The default values are: tcp_keepalive_time = 7200, tcp_keepalive_intvl = 75, tcp_keepalive_probes = 9
	# We would decrease the default values for tcp_keepalive_* params as follow:
	# Disconnect dead TCP connections after 10 minutes
	net.ipv4.tcp_keepalive_time = 600
	# Determines the wait time between isAlive interval probes (reduce from 75 sec to 15)
	net.ipv4.tcp_keepalive_intvl = 15
	# Determines the number of probes before timing out (reduce from 9 sec to 5 sec)
	net.ipv4.tcp_keepalive_probes = 5

	# allow that much active connections
	net.core.somaxconn = 256000

	# Protection from SYN flood attack.
	net.ipv4.tcp_syncookies = 1
	# Only retry creating TCP connections twice
	# Minimize the time it takes for a connection attempt to fail
	net.ipv4.tcp_syn_retries = 2
	net.ipv4.tcp_synack_retries = 2
	net.ipv4.tcp_orphan_retries = 2

	# Handle SYN floods and large numbers of valid HTTPS connections
	net.ipv4.tcp_max_syn_backlog = 40000

	# Increase the length of the network device input queue
	net.core.netdev_max_backlog = 50000

	# Faster full-speed than cubic
	# And faster recover if connection looses packets
	net.ipv4.tcp_congestion_control = yeah
	# http://lwn.net/Articles/616241/
	net.core.default_qdisc = fq_codel

	# Increase ephermeral IP ports
	net.ipv4.ip_local_port_range = 10000 60000

	# Broken combined
	net.ipv4.tcp_tw_reuse = 0
	# The net.ipv4.tcp_tw_recycle has been removed from Linux 4.12 on 2017.
	# Removed by upstream kernel, absent since PVE 5.1.
	# So comment it out if you use PVE 5.1+
	# Let's mark it - PVE3
	net.ipv4.tcp_tw_recycle = 0

	# Don't need IPv6 for now
	# If you use IPv6 - comment this line
	net.ipv6.conf.all.disable_ipv6 = 1

	# https://www.serveradminblog.com/2011/02/neighbour-table-overflow-sysctl-conf-tunning/
	net.ipv4.neigh.default.gc_thresh1 = 1024
	net.ipv4.neigh.default.gc_thresh2 = 2048
	net.ipv4.neigh.default.gc_thresh3 = 4096

	# http://www.opennet.ru/opennews/art.shtml?num=44945
	net.ipv4.tcp_challenge_ack_limit = 9999

	# Don't slow network - save congestion window after idle
	# https://github.com/ton31337/tools/wiki/tcp_slow_start_after_idle---tcp_no_metrics_save-performance
	net.ipv4.tcp_slow_start_after_idle = 0

	# If we must send packets at first place, but throughput is on second
	# Or many small packets.
	net.ipv4.tcp_low_latency = 1

	#### PVE ####

	# Allow a high number of timewait sockets
	net.ipv4.tcp_max_tw_buckets = 2000000

	# PVE 3
	net.ipv4.tcp_max_tw_buckets_ub = 65000


	# Increase Linux autotuning TCP buffer limits
	net.ipv4.tcp_wmem = 4096 65536 16777216
	net.ipv4.tcp_rmem = 4096 87380 16777216
	net.core.rmem_max = 16777216
	net.core.wmem_max = 16777216
	net.core.optmem_max = 65536


	# If your servers talk UDP, also up these limits
	net.ipv4.udp_rmem_min = 8192
	net.ipv4.udp_wmem_min = 8192

	# Sockets/UDP query length
	net.unix.max_dgram_qlen = 1024

	# http://vds-admin.ru/unix-linux/oshibki-v-dmesg-vida-nfconntrack-table-full-dropping-packet
	# load module nf_contrack if needed
	net.netfilter.nf_conntrack_max = 1048576
	net.nf_conntrack_max = 1048576


	### MEMORY ###

	# do less swap but not disable it
	vm.swappiness = 2

	# allow application request allocation of virtual memory
	# more than real RAM size (or OpenVZ/LXC limits)
	vm.overcommit_memory = 1

	# https://major.io/2008/12/03/reducing-inode-and-dentry-caches-to-keep-oom-killer-at-bay/
	vm.vfs_cache_pressure = 500

	# time in centi-sec. i.e. 100 points = 1 second
	# delayed write of dirty data
	vm.dirty_writeback_centisecs = 3000
	# flush from memory old dirty data
	vm.dirty_expire_centisecs = 18000

	##
	# Adjust vfs cache
	# https://lonesysadmin.net/2013/12/22/better-linux-disk-caching-performance-vm-dirty_ratio/
	# Decriase dirty cache to faster flush on disk
	vm.dirty_background_ratio = 5
	vm.dirty_ratio = 10

	#### PVE 3 ####

	# Only on Proxmox 3.x with OpenVZ
	ubc.dirty_ratio = 20
	ubc.dirty_background_ratio = 10

	# Isolate page cache for VPS.
	ubc.pagecache_isolation = 1

	### FileSystem ###

	##
	# Fix: Failed to allocate directory watch: Too many open files
	# in Proxmox 5 + LXC
	# And VM with Bitrix
	# == alot of files

	fs.inotify.max_user_instances = 16777216
	fs.inotify.max_queued_events = 32000
	fs.inotify.max_user_watches = 64000


	### Security ###

	# http://www.opennet.ru/opennews/art.shtml?num=47792
	kernel.unprivileged_bpf_disabled=1

	# http://www.opennet.ru/opennews/art.shtml?num=49135
	# https://community.rti.com/kb/how-can-i-improve-my-throughput-performance-linux
	net.ipv4.ipfrag_high_thresh=8388608
	net.ipv4.ipfrag_low_thresh=196608
	net.ipv6.ip6frag_high_thresh=8388608
	net.ipv6.ip6frag_low_thresh=196608

	# http://www.opennet.ru/opennews/art.shtml?num=50889
	net.ipv4.tcp_sack = 0
	net.ipv4.tcp_mtu_probing = 0

	# Prevent TIME_WAIT attak.
	net.ipv4.tcp_rfc1337 = 1


	### OTHER ###

	### PVE 3 - 6 kernels ###
	# https://tweaked.io/guide/kernel/
	# Don't migrate processes between CPU cores too often
	# kernel <= 5.4 (ie Proxmox 6)
	kernel.sched_migration_cost_ns = 5000000
	# Kernel >= 2.6.38 (ie Proxmox 4+)
	kernel.sched_autogroup_enabled = 0