Skip to content

Instantly share code, notes, and snippets.

@zackbradys
Last active December 29, 2023 05:55
Show Gist options
  • Save zackbradys/141484743cb7b183b5135358c5dd61b3 to your computer and use it in GitHub Desktop.
Save zackbradys/141484743cb7b183b5135358c5dd61b3 to your computer and use it in GitHub Desktop.

Create RKE2 Cluster using Rancher Cluster Templates and Assumed Roles

view the repo: https://github.com/rancherfederal/rancher-cluster-templates

Setup the Rancher Management Cluster

Step 1: Create the IAM Policy

aws iam create-policy --policy-name aws-rgs-rancher-mgmt-policy --policy-document '{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": [
        "iam:*",
        "kms:*",
        "ec2:*",
        "autoscaling:*",
        "elasticloadbalancing:*",
        "ecr:*"
      ],
      "Resource": "*"
    }
  ]
}'

Step 2: Create the IAM Role

aws iam create-role --role-name aws-rgs-rancher-mgmt-role --assume-role-policy-document '{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "AWS": "*"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}'

Step 3: Attach the IAM Policy to the IAM Role

iam_policy_arn=$(aws iam list-policies --query 'Policies[?PolicyName==`aws-rgs-rancher-mgmt-policy`].Arn' --output text)

aws iam attach-role-policy --role-name aws-rgs-rancher-mgmt-role --policy-arn $iam_policy_arn 

Step 4: Create the IAM Instance Profile

aws iam create-instance-profile --instance-profile-name aws-rgs-rancher-mgmt-profile 

Step 5: Attach the IAM Role to the IAM Instance Profile

aws iam add-role-to-instance-profile --instance-profile-name aws-rgs-rancher-mgmt-profile --role-name aws-rgs-rancher-mgmt-role

Step 6: Attach the IAM Instance Profile to the Rancher Manager Nodes

aws ec2 describe-instances --filters Name=instance-state-name,Values=running --query 'Reservations[*].Instances[*].[InstanceId, Tags[?Key==`Name`].Value[], State.Name]' --output table

# do this for each rancher manager node (usually three nodes)
aws ec2 associate-iam-instance-profile --instance-id YourInstanceID --iam-instance-profile Name=aws-rgs-rancher-mgmt-profile

Create the Downstream Cluster using Rancher Cluster Templates

Step 1: Create the values.yaml:

# be sure to add all "required" values...

# amazonec2, azure, digitalocean, harvester, vsphere, custom
cloudprovider: amazonec2

# rancher manager url
rancher:
  cattle:
    url: rancher.ranchers.io

# cluster values
cluster:
  # annotations:
    # key: value
  # labels:
    # key: value
  name: rke2-cluster-aws
  config:
    systemDefaultRegistry: rgcrprod.azurecr.us # default registry
    kubernetesVersion: v1.26.11+rke2r1 # https://github.com/rancher/rke2/releases
    localClusterAuthEndpoint:
      enabled: false
    # agentEnvVars:
      # - key:value
    cni: canal # canal, calico, cilium, multus,canal, multus,calico, multus,cilium
    docker: false
    disable_kube_proxy: false
    etcd_expose_metrics: false
    profile: cis-1.23 # cis-1.6, cis-1.23
    selinux: true
    secrets_encryption: true
    write_kubeconfig_mode: 0600
    use_service_account_credentials: true
    protect_kernel_defaults: true
    cloud_provider_name: aws # aws, azure, harvester, vsphere
    # cloud_provider_config: '' # cloud provider config secret here (example: secret://fleet-default:cloudprovider)
    kube_controller_manager_arg: # (https://kubernetes.io/docs/reference/command-line-tools-reference/kube-controller-manager)
      - bind-address=127.0.0.1
      - use-service-account-credentials=true
      - tls-min-version=VersionTLS12
      - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
    kube_scheduler_arg: # (https://kubernetes.io/docs/reference/command-line-tools-reference/kube-scheduler)
      - tls-min-version=VersionTLS12
      - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
    kube_apiserver_arg: # (https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver)
      - tls-min-version=VersionTLS12
      - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
      - authorization-mode=RBAC,Node
      - anonymous-auth=false
      - admission-control-config-file=/etc/rancher/rke2/rancher-pss.yaml
      - audit-policy-file=/etc/rancher/rke2/audit-policy.yaml
      - audit-log-mode=blocking-strict
      - audit-log-maxage=30
    kubelet_arg: # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet)
      - protect-kernel-defaults=true
      - read-only-port=0
      - authorization-mode=Webhook
      - streaming-connection-idle-timeout=5m
    registries:
      enabled: true
      configs:
        - name: rgcrprod.azurecr.us
          authConfigSecretName: registry-creds
          caBundle: ''
          insecureSkipVerify: false
          tlsSecretName: ''
      mirrors:
        - name: docker.io
          endpoints:
            - rgcrprod.azurecr.us
    upgradeStrategy:
      controlPlaneConcurrency: 10%
      controlPlaneDrainOptions:
        enabled: false
        # deleteEmptyDirData: true
        # disableEviction: false
        # force: false
        # gracePeriod: -1
        # ignoreDaemonSets: true
        # ignoreErrors: false
        # skipWaitForDeleteTimeoutSeconds: 0
        # timeout: 120
      workerConcurrency: 10%
      workerDrainOptions:
        enabled: false
        # deleteEmptyDirData: true
        # disableEviction: false
        # force: false
        # gracePeriod: -1
        # ignoreDaemonSets: true
        # ignoreErrors: false
        # skipWaitForDeleteTimeoutSeconds: 0
        # timeout: 120

# node and nodepool(s) values
nodepools:
  - name: control-plane-nodes
    quantity: 3
    etcd: true
    controlplane: true
    worker: false
    # labels:
      # key: value
    # taints:
      # key: value
    paused: false
    # accessKey: # only required if not using cloudCredentialSecretName
    # secretKey: # only required if not using cloudCredentialSecretName
    # sessionToken: # only required if not using cloudCredentialSecretName
    ami: ami-079db87dc4c10ac91 # required (example: ami-123456789)
    # blockDurationMinutes: 0
    deviceName: /dev/sda1
    encryptEbsVolume: false
    # kmsKey: ''
    endpoint: ''
    # httpEndpoint: ''
    # httpTokens: ''
    iamInstanceProfile: 'aws-rgs-mgmt-cluster-iam-profile-control' # required (example: rancher-iam-instance-profile) - https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/kubernetes-clusters-in-rancher-setup/set-up-cloud-providers/amazon
    insecureTransport: false
    instanceType: m5.xlarge # required (example: us-east-1)
    region: us-east-1 # required (example: us-east-1)
    createSecurityGroup: false
    securityGroups: ['aws-rgs-mgmt-cluster-sg'] # https://ranchermanager.docs.rancher.com/getting-started/installation-and-upgrade/installation-requirements/port-requirements
    # openPort:
    # - "80"
    # - "443"
    keypairName: ''
    securityGroupReadonly: false
    sshKeyContents: ''
    subnetId: subnet-076aa666bf2adc2a8 # required (example: subnet-123456789)
    zone: a # required (example: a)
    monitoring: false
    privateAddressOnly: true
    requestSpotInstance: false
    # spotPrice: ''
    tags: provisioner,rancher,KeepRunning,true
    retries: 5
    rootSize: 128
    sshUser: ec2-user
    volumeType: gp3
    vpcId: vpc-0934dc8778cdf65db # required (example: vpc-123456789)
    useEbsOptimizedInstance: false
    usePrivateAddress: true
    userdata: |
      #cloud-config
      package_update: true
      packages: ['iptables', 'container-selinux', 'libnetfilter_conntrack', 'libnfnetlink', 'libnftnl', 'policycoreutils-python-utils', 'cryptsetup', 'nfs-utils', 'iscsi-initiator-utils', 'zip', 'zstd', 'tree', 'jq']
      write_files:
      - path: /etc/sysctl.conf
        owner: root
        content: |
          vm.swappiness=0
          vm.panic_on_oom=0
          vm.overcommit_memory=1
          kernel.panic=10
          kernel.panic_on_oops=1
          vm.max_map_count = 262144
          net.ipv4.ip_local_port_range=1024 65000
          net.core.somaxconn=10000
          net.ipv4.tcp_tw_reuse=1
          net.ipv4.tcp_fin_timeout=15
          net.core.somaxconn=4096
          net.core.netdev_max_backlog=4096
          net.core.rmem_max=16777216
          net.core.wmem_max=16777216
          net.ipv4.tcp_max_syn_backlog=20480
          net.ipv4.tcp_max_tw_buckets=400000
          net.ipv4.tcp_no_metrics_save=1
          net.ipv4.tcp_rmem=4096 87380 16777216
          net.ipv4.tcp_syn_retries=2
          net.ipv4.tcp_synack_retries=2
          net.ipv4.tcp_wmem=4096 65536 16777216
          net.ipv4.neigh.default.gc_thresh1=8096
          net.ipv4.neigh.default.gc_thresh2=12288
          net.ipv4.neigh.default.gc_thresh3=16384
          net.ipv4.tcp_keepalive_time=600
          net.ipv4.ip_forward=1
          net.ipv6.conf.all.disable_ipv6 = 1
          net.ipv6.conf.default.disable_ipv6 = 1
          fs.inotify.max_user_instances=8192
          fs.inotify.max_user_watches=1048576
      - path: /etc/rancher/rke2/audit-policy.yaml
        owner: root
        content: |
          apiVersion: audit.k8s.io/v1
          kind: Policy
          metadata:
            name: rke2-audit-policy
          rules:
            - level: Metadata
              resources:
              - group: ""
                resources: ["secrets"]
            - level: RequestResponse
              resources:
              - group: ""
                resources: ["*"]
      - path: /etc/rancher/rke2/rancher-pss.yaml
        owner: root
        content: |
          apiVersion: apiserver.config.k8s.io/v1
          kind: AdmissionConfiguration
          plugins:
            - name: PodSecurity
              configuration:
                apiVersion: pod-security.admission.config.k8s.io/v1
                kind: PodSecurityConfiguration
                defaults:
                  enforce: "restricted"
                  enforce-version: "latest"
                  audit: "restricted"
                  audit-version: "latest"
                  warn: "restricted"
                  warn-version: "latest"
                exemptions:
                  usernames: []
                  runtimeClasses: []
                  namespaces: [calico-apiserver,
                              calico-system,
                              carbide-docs-system,
                              carbide-stigatron-system,
                              cattle-alerting,
                              cattle-csp-adapter-system,
                              cattle-elemental-system,
                              cattle-epinio-system,
                              cattle-externalip-system,
                              cattle-fleet-local-system,
                              cattle-fleet-system,
                              cattle-gatekeeper-system,
                              cattle-global-data,
                              cattle-global-nt,
                              cattle-impersonation-system,
                              cattle-istio,
                              cattle-istio-system,
                              cattle-logging,
                              cattle-logging-system,
                              cattle-monitoring-system,
                              cattle-neuvector-system,
                              cattle-prometheus,
                              cattle-provisioning-capi-system,
                              cattle-resources-system,
                              cattle-sriov-system,
                              cattle-system,
                              cattle-ui-plugin-system,
                              cattle-windows-gmsa-system,
                              cert-manager,
                              cis-operator-system,
                              fleet-default,
                              fleet-local,
                              ingress-nginx,
                              istio-system,
                              kube-node-lease,
                              kube-public,
                              kube-system,
                              longhorn-system,
                              rancher-alerting-drivers,
                              security-scan,
                              tigera-operator]
      runcmd:
      - sudo sysctl -p > /dev/null 2>&1
      - sudo curl -#OL https://github.com/rancher/rke2-selinux/releases/download/v0.17.stable.1/rke2-selinux-0.17-1.el8.noarch.rpm && sudo rpm -i rke2-selinux-0.17-1.el8.noarch.rpm
      - sudo echo "InitiatorName=$(/sbin/iscsi-iname)" > /etc/iscsi/initiatorname.iscsi && systemctl enable --now iscsid
      - sudo systemctl stop firewalld; systemctl disable firewalld; systemctl stop nm-cloud-setup; systemctl disable nm-cloud-setup; systemctl stop nm-cloud-setup.timer; systemctl disable nm-cloud-setup.timer
      - sudo echo -e "[keyfile]\nunmanaged-devices=interface-name:cali*;interface-name:flannel*" > /etc/NetworkManager/conf.d/rke2-canal.conf
      - sudo mkdir -p /opt/rke2-artifacts/ /etc/rancher/rke2/ /var/lib/rancher/rke2/server/manifests/
      - sudo useradd -r -c "etcd user" -s /sbin/nologin -M etcd -U
  - name: worker-nodes
    quantity: 3
    etcd: false
    controlplane: false
    worker: true
    # labels:
      # key: value
    # taints:
      # key: value
    paused: false
    # accessKey: # only required if not using cloudCredentialSecretName
    # secretKey: # only required if not using cloudCredentialSecretName
    # sessionToken: # only required if not using cloudCredentialSecretName
    ami: ami-079db87dc4c10ac91 # required (example: ami-123456789)
    deviceName: /dev/sda1
    encryptEbsVolume: false
    endpoint: ''
    iamInstanceProfile: 'aws-rgs-mgmt-cluster-iam-profile-worker' # required (example: rancher-iam-instance-profile)
    insecureTransport: false
    instanceType: m5.2xlarge
    region: us-east-1 # required (example: us-east-1)
    createSecurityGroup: false
    securityGroups: ['aws-rgs-mgmt-cluster-sg'] # https://ranchermanager.docs.rancher.com/getting-started/installation-and-upgrade/installation-requirements/port-requirements
    # openPort:
    # - "80"
    # - "443"
    keypairName: ''
    securityGroupReadonly: false
    sshKeyContents: ''
    subnetId: subnet-076aa666bf2adc2a8 # required (example: subnet-123456789)
    zone: a # required (example: a)
    monitoring: false
    privateAddressOnly: true
    requestSpotInstance: false
    # spotPrice: ''
    tags: provisioner,rancher,KeepRunning,true
    retries: 5
    rootSize: 256
    sshUser: ec2-user
    volumeType: gp3
    vpcId: vpc-0934dc8778cdf65db # required (example: vpc-123456789)
    useEbsOptimizedInstance: false
    usePrivateAddress: true
    userdata: |
      #cloud-config
      package_update: true
      packages: ['iptables', 'container-selinux', 'libnetfilter_conntrack', 'libnfnetlink', 'libnftnl', 'policycoreutils-python-utils', 'cryptsetup', 'nfs-utils', 'iscsi-initiator-utils', 'zip', 'zstd', 'tree', 'jq']
      write_files:
      - path: /etc/sysctl.conf
        owner: root
        content: |
          vm.swappiness=0
          vm.panic_on_oom=0
          vm.overcommit_memory=1
          kernel.panic=10
          kernel.panic_on_oops=1
          vm.max_map_count = 262144
          net.ipv4.ip_local_port_range=1024 65000
          net.core.somaxconn=10000
          net.ipv4.tcp_tw_reuse=1
          net.ipv4.tcp_fin_timeout=15
          net.core.somaxconn=4096
          net.core.netdev_max_backlog=4096
          net.core.rmem_max=16777216
          net.core.wmem_max=16777216
          net.ipv4.tcp_max_syn_backlog=20480
          net.ipv4.tcp_max_tw_buckets=400000
          net.ipv4.tcp_no_metrics_save=1
          net.ipv4.tcp_rmem=4096 87380 16777216
          net.ipv4.tcp_syn_retries=2
          net.ipv4.tcp_synack_retries=2
          net.ipv4.tcp_wmem=4096 65536 16777216
          net.ipv4.neigh.default.gc_thresh1=8096
          net.ipv4.neigh.default.gc_thresh2=12288
          net.ipv4.neigh.default.gc_thresh3=16384
          net.ipv4.tcp_keepalive_time=600
          net.ipv4.ip_forward=1
          net.ipv6.conf.all.disable_ipv6 = 1
          net.ipv6.conf.default.disable_ipv6 = 1
          fs.inotify.max_user_instances=8192
          fs.inotify.max_user_watches=1048576
      runcmd:
      - sudo sysctl -p > /dev/null 2>&1
      - sudo curl -#OL https://github.com/rancher/rke2-selinux/releases/download/v0.17.stable.1/rke2-selinux-0.17-1.el8.noarch.rpm && sudo rpm -i rke2-selinux-0.17-1.el8.noarch.rpm
      - sudo echo "InitiatorName=$(/sbin/iscsi-iname)" > /etc/iscsi/initiatorname.iscsi && systemctl enable --now iscsid
      - sudo systemctl stop firewalld; systemctl disable firewalld; systemctl stop nm-cloud-setup; systemctl disable nm-cloud-setup; systemctl stop nm-cloud-setup.timer; systemctl disable nm-cloud-setup.timer
      - sudo echo -e "[keyfile]\nunmanaged-devices=interface-name:cali*;interface-name:flannel*" > /etc/NetworkManager/conf.d/rke2-canal.conf
      - sudo mkdir -p /etc/rancher/rke2/

# addons values
addons:
  monitoring:
    enabled: false
    # version:
    # values:
      # values here

  longhorn:
    enabled: false
    # version:
    # values:
      # values here

  neuvector:
    enabled: false
    # version:
    # values:
      # values here

Step 2: Create the Cluster

helm upgrade -i cluster oci://ghcr.io/rancherfederal/rancher-cluster-templates -n fleet-default -f values-aws.yaml
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment