Last active
August 21, 2024 12:35
-
-
Save talawahtech/ce2fe1f6a3e3851d15e912e0a4e93734 to your computer and use it in GitHub Desktop.
CloudFormation template for "Extreme HTTP Performance Tuning" post: https://talawah.io/blog/extreme-http-performance-tuning-one-point-two-million/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AWSTemplateFormatVersion: '2010-09-09' | |
Description: Extreme Performance Tuning Benchmark Environment | |
Parameters: | |
AmiId: | |
Type: AWS::SSM::Parameter::Value<AWS::EC2::Image::Id> | |
Default: '/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2' | |
InstanceKeyPair: | |
Type: AWS::EC2::KeyPair::KeyName | |
InstanceSecurityGroup: | |
Type: AWS::EC2::SecurityGroup::Id | |
InstanceSubnet: | |
Type: AWS::EC2::Subnet::Id | |
InstanceVolumeSize: | |
Type: Number | |
Default: 8 | |
Resources: | |
Client: | |
Type: AWS::EC2::Instance | |
Properties: | |
InstanceType: 'c5n.4xlarge' | |
Tags: | |
- Key: 'Name' | |
Value: 'extreme-client' | |
- Key: 'Role' # Used by cloud-init script to conditionally apply changes to only the client or server | |
Value: 'client' | |
LaunchTemplate: | |
LaunchTemplateId: !Ref 'LaunchTemplate' | |
Version: !GetAtt 'LaunchTemplate.LatestVersionNumber' | |
Server: | |
Type: AWS::EC2::Instance | |
Properties: | |
InstanceType: 'c5n.xlarge' | |
Tags: | |
- Key: 'Name' | |
Value: 'extreme-server' | |
- Key: 'Role' # Used by cloud-init script to conditionally apply changes to only the client or server | |
Value: 'server' | |
LaunchTemplate: | |
LaunchTemplateId: !Ref 'LaunchTemplate' | |
Version: !GetAtt 'LaunchTemplate.LatestVersionNumber' | |
ClusterPlacementGroup: | |
Type: AWS::EC2::PlacementGroup | |
Properties: | |
Strategy: cluster | |
# Allows 'aws ec2 describe-tags' to be called from the cloud-init script so it can differentiate client from server | |
Ec2Role: | |
Type: AWS::IAM::Role | |
Properties: | |
Path: / | |
Policies: | |
- PolicyName: 'AllowInstanceLogs' | |
PolicyDocument: | |
Version: '2012-10-17' | |
Statement: | |
- Effect: Allow | |
Action: [ 'ec2:DescribeTags' ] | |
Resource: '*' | |
AssumeRolePolicyDocument: | |
Statement: | |
- Effect: Allow | |
Principal: | |
Service: ['ec2.amazonaws.com'] | |
Action: ['sts:AssumeRole'] | |
Ec2InstanceProfile: | |
Type: AWS::IAM::InstanceProfile | |
Properties: | |
Path: / | |
Roles: [!Ref 'Ec2Role'] | |
LaunchTemplate: | |
Type: AWS::EC2::LaunchTemplate | |
Properties: | |
LaunchTemplateName: !Ref 'AWS::StackName' | |
LaunchTemplateData: | |
ImageId: !Ref 'AmiId' | |
KeyName: !Ref 'InstanceKeyPair' | |
IamInstanceProfile: | |
Arn: !GetAtt 'Ec2InstanceProfile.Arn' | |
Placement: | |
GroupName: !Ref 'ClusterPlacementGroup' | |
NetworkInterfaces: | |
- DeviceIndex: 0 | |
Ipv6AddressCount: 0 # Ensure that we don't get assigned any IPv6 addresses, even if it is the default for the subnet | |
SubnetId: !Ref 'InstanceSubnet' | |
Groups: | |
- !Ref 'InstanceSecurityGroup' | |
BlockDeviceMappings: | |
- DeviceName: '/dev/xvda' | |
Ebs: | |
VolumeSize: !Ref 'InstanceVolumeSize' | |
VolumeType: 'gp3' | |
UserData: | |
Fn::Base64: !Sub | | |
Content-Type: multipart/mixed; boundary="==BOUNDARY==" | |
MIME-Version: 1.0 | |
--==BOUNDARY== | |
Content-Type: text/cloud-config; charset="us-ascii" | |
Content-Disposition: attachment; filename="cloud-config.txt" | |
# Automatically reboot after cloud-init completes to apply kernel param changes | |
power_state: | |
mode: reboot | |
message: Rebooting to apply new kernel params | |
timeout: 10 | |
condition: True | |
bootcmd: | |
# These commands run on every boot, not just the first boot | |
#### Disable iptables | |
- modprobe -rv ip_tables | |
##### ENA driver configuration. Disable generic receive offloading | |
- ethtool -K eth0 gro off | |
##### ENA driver configuration. Enable adaptive IRQ coalescing (server only) | |
- export INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) | |
- echo INSTANCE_ID = ${!INSTANCE_ID} | |
- export INSTANCE_ROLE=$(aws ec2 describe-tags --region ${AWS::Region} --filters "Name=resource-id,Values=${!INSTANCE_ID}" "Name=key,Values=Role" --output text | cut -f5) | |
- echo INSTANCE_ROLE = ${!INSTANCE_ROLE} | |
- if [ "${!INSTANCE_ROLE}" == "server" ]; then ethtool -C eth0 adaptive-rx on; fi | |
- if [ "${!INSTANCE_ROLE}" == "server" ]; then ethtool -C eth0 tx-usecs 256; fi | |
##### Disable irqbalance and fix IRQs to cpus. Assumes # of irqs/queues = # of cpus!!! | |
## Note ${!} is the CF escape sequence for the bash equivalent and ${!!} is needed to get a literal ${!} | |
## sleep to give irqbalance time to shutdown before manually setting the values | |
- systemctl stop irqbalance.service | |
- echo sleeping | |
- sleep 5 | |
- export IRQS=($(grep eth0 /proc/interrupts | awk '{print $1}' | tr -d :)) | |
- for i in ${!!IRQS[@]}; do echo $i > /proc/irq/${!IRQS[i]}/smp_affinity_list; done; | |
- echo irq affinity | |
- for i in ${!!IRQS[@]}; do cat /proc/irq/${!IRQS[i]}/smp_affinity_list; done; | |
##### Setup Transmit Packet Steering (XPS) to map queue x to cpu x for outgoing packets. Assumes # of queues = # of cpus!!! | |
## A hex bitmap is used in this case, not the cpu id so we raise 2 to the power of i and convert it to hex | |
## Note ${!} is the CF escape sequence for the bash equivalent and ${!!} is needed to get a literal ${!} | |
- export TXQUEUES=($(ls -1qdv /sys/class/net/eth0/queues/tx-*)) | |
- for i in ${!!TXQUEUES[@]}; do printf '%x' $((2**i)) > ${!TXQUEUES[i]}/xps_cpus; done; | |
- echo 'xps_cpus' | |
- for i in ${!!TXQUEUES[@]}; do cat ${!TXQUEUES[i]}/xps_cpus; done; | |
## Stop dhclient and set address lifetime to "forever" | |
- dhclient -x -pf /var/run/dhclient-eth0.pid | |
- dhclient -x -pf /var/run/dhclient6-eth0.pid | |
- ip addr change $( ip -4 addr show dev eth0 | grep 'inet' | awk '{ print $2 " brd " $4 " scope global"}') dev eth0 valid_lft forever preferred_lft forever | |
packages: | |
- git | |
- gcc | |
- make | |
- htop | |
- iperf3 | |
- dstat | |
- pcp-system-tools | |
- perf | |
- iproute-tc | |
--==BOUNDARY== | |
Content-Type: text/x-shellscript; charset="us-ascii" | |
Content-Disposition: attachment; filename="user-data-script.txt" | |
#!/bin/bash | |
# Configure sysctls | |
cat > /etc/sysctl.d/90-extreme.conf <<- EOF | |
vm.swappiness=0 | |
vm.dirty_ratio=80 | |
net.core.somaxconn=2048 | |
net.ipv4.tcp_max_syn_backlog=10000 | |
net.core.busy_poll=1 | |
net.core.default_qdisc=noqueue | |
net.ipv4.tcp_congestion_control=reno | |
EOF | |
# Reload sysctl to pick up new configs | |
sysctl -p | |
# Disable ssm agent. It doesn't really affect throughput, but any network activity can affect p99 and stdev for latency | |
systemctl stop amazon-ssm-agent | |
systemctl disable amazon-ssm-agent | |
# Install docker and stress-ng from amazon-linux-extras | |
amazon-linux-extras enable -y docker testing | |
yum install -y docker stress-ng | |
# Add the ec2-user and to the docker group so you can execute Docker commands without using sudo | |
usermod -a -G docker ec2-user | |
# Configure and start docker with iptables support disabled | |
mkdir -p /etc/systemd/system/docker.service.d/ | |
cat > /etc/systemd/system/docker.service.d/startup_options.conf <<- EOF | |
[Service] | |
ExecStart= | |
ExecStart=/usr/bin/dockerd -H fd:// --bridge=none --iptables=false --ip-forward=false --live-restore | |
EOF | |
systemctl daemon-reload | |
systemctl enable docker | |
systemctl start docker | |
# Build (t)wrk | |
# Note that the luajit-devel package comes from the amazon-linux-extras repo for BCC | |
amazon-linux-extras enable BCC | |
yum clean metadata | |
yum install -y openssl11-devel luajit-devel-2.1.0 | |
cd /home/ec2-user/ | |
git clone https://github.com/talawahtech/wrk --single-branch --branch twrk twrk | |
cd twrk | |
make WITH_LUAJIT=/usr WITH_OPENSSL=/usr CFLAGS="-I /usr/include/luajit-2.1" | |
mv twrk /usr/local/bin/ | |
chown -R ec2-user:ec2-user /home/ec2-user/twrk/ | |
# Build and run the libreactor (round 20) docker container on the server | |
cd /home/ec2-user/ | |
git clone https://github.com/TechEmpower/FrameworkBenchmarks --branch R20 --single-branch | |
chown -R ec2-user:ec2-user /home/ec2-user/FrameworkBenchmarks/ | |
cd FrameworkBenchmarks/frameworks/C/libreactor/ | |
docker build . -f libreactor.dockerfile --network host -t libreactor | |
docker build . -f libreactor-server.dockerfile --network host -t libreactor-server | |
# Install Flamegraph tools | |
cd /home/ec2-user/ | |
git clone https://github.com/brendangregg/FlameGraph | |
chown -R ec2-user:ec2-user /home/ec2-user/FlameGraph/ | |
# Download custom palette.map | |
wget -q https://gist.githubusercontent.com/talawahtech/b043e2dbf12af746de06b9b86c1a8b80/raw/ -O palette.map | |
chown ec2-user:ec2-user /home/ec2-user/palette.map | |
# Download network monitor script | |
wget -q https://gist.githubusercontent.com/talawahtech/de78601f1201d9586ac19fff420024b8/raw/ -O netmonitor.sh | |
chmod a+x netmonitor.sh | |
mv netmonitor.sh /usr/local/bin/ | |
#### Set kernel params to disable speculative execution mitigations. Requires a reboot to take effect, which is handled above | |
sed -i 's/^GRUB_CMDLINE_LINUX_DEFAULT="/&nospectre_v1 nospectre_v2 pti=off mds=off tsx_async_abort=off /' /etc/default/grub | |
grub2-mkconfig -o /boot/grub2/grub.cfg | |
#### Disable syscall auditing (but otherwise leave auditd functioning). | |
echo "-a never,task" > /etc/audit/rules.d/disable-syscall-auditing.rules | |
/sbin/augenrules --load | |
--==BOUNDARY==-- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment