Last active
October 29, 2022 11:06
-
-
Save dbafromthecold/2c96994ea2e0d737bb45327a3413d95e to your computer and use it in GitHub Desktop.
Creating a VMWare pacemaker cluster to run SQL Server Availability Groups
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###################################################################################################################### | |
# | |
# Creating a pacemaker cluser and availability group on VMWare Virtual Machines - Andrew Pruski | |
# @dbafromthecold | |
# dbafromthecold@gmail.com | |
# | |
###################################################################################################################### | |
# Installing SQL Server | |
###################################################################################################################### | |
# this is for a three node cluster running Ubuntu 20.04 in VMWare | |
# install sql server | |
wget -qO- https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - | |
sudo add-apt-repository "$(wget -qO- https://packages.microsoft.com/config/ubuntu/20.04/mssql-server-preview.list)" | |
sudo apt-get update | |
sudo apt-get install -y mssql-server | |
# install sql server ha components | |
sudo apt-get install mssql-server-ha | |
# configure sql server | |
sudo /opt/mssql/bin/mssql-conf setup | |
sudo /opt/mssql/bin/mssql-conf set sqlagent.enabled true | |
sudo /opt/mssql/bin/mssql-conf set hadr.hadrenabled 1 | |
sudo systemctl restart mssql-server | |
systemctl status mssql-server | |
###################################################################################################################### | |
# Configuring the nodes | |
###################################################################################################################### | |
# add entries for servers into /etc/hosts | |
sudo vim /etc/hosts | |
<NODE1-IP-ADDRESS> <NODE1> | |
<NODE2-IP-ADDRESS> <NODE2> | |
<NODE3-IP-ADDRESS> <NODE3> | |
<LISTENER-IP-ADDRESS> <LISTENER NAME> | |
# install cluster components | |
sudo apt-get install -y pacemaker pacemaker-cli-utils crmsh resource-agents fence-agents csync2 | |
# run on primary server | |
sudo corosync-keygen | |
# copy to other servers | |
sudo scp /etc/corosync/authkey ubuntu@<NODE2>:~ | |
sudo scp /etc/corosync/authkey ubuntu@<NODE3>:~ | |
# run on other servers | |
sudo mv authkey /etc/corosync/authkey | |
# run on primary server | |
sudo vim /etc/corosync/corosync.conf | |
# example file: - | |
totem { | |
version: 2 | |
cluster_name: <CLUSTER-NAME> | |
transport: udpu | |
crypto_cipher: none | |
crypto_hash: none | |
} | |
logging { | |
fileline: off | |
to_stderr: yes | |
to_logfile: yes | |
logfile: /var/log/corosync/corosync.log | |
to_syslog: yes | |
debug: off | |
logger_subsys { | |
subsys: QUORUM | |
debug: off | |
} | |
} | |
quorum { | |
provider: corosync_votequorum | |
} | |
nodelist { | |
node { | |
name: <NODE1> | |
nodeid: 1 | |
ring0_addr: <NODE1-IP-ADDRESS> | |
} | |
node { | |
name: <NODE2> | |
nodeid: 2 | |
ring0_addr: <NODE2-IP-ADDRESS> | |
} | |
node { | |
name: <NODE3> | |
nodeid: 3 | |
ring0_addr: <NODE3-IP-ADDRESS> | |
} | |
} | |
# copy to other servers | |
sudo scp /etc/corosync/corosync.conf ubuntu@<NODE2>:~ | |
sudo scp /etc/corosync/corosync.conf ubuntu@<NODE3>:~ | |
# move conf file on other servers | |
sudo mv corosync.conf /etc/corosync/ | |
# run on all servers | |
sudo systemctl restart pacemaker corosync | |
# confirm status of cluster | |
sudo crm status | |
###################################################################################################################### | |
# create availability group in SQL Server | |
###################################################################################################################### | |
# create always on extended events on all servers | |
ALTER EVENT SESSION AlwaysOn_health ON SERVER WITH (STARTUP_STATE=ON); | |
GO | |
# create master key and cert on primary node | |
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'PASSWORD'; | |
CREATE CERTIFICATE dbm_certificate WITH SUBJECT = 'dbm'; | |
BACKUP CERTIFICATE dbm_certificate | |
TO FILE = '/var/opt/mssql/data/dbm_certificate.cer' | |
WITH PRIVATE KEY ( | |
FILE = '/var/opt/mssql/data/dbm_certificate.pvk', | |
ENCRYPTION BY PASSWORD = 'PASSWORD' | |
); | |
# copy cert backups to other nodes | |
sudo su | |
cd /var/opt/mssql/data | |
scp dbm_certificate.* ubuntu@<NODE2>:~ | |
scp dbm_certificate.* ubuntu@<NODE1>:~ | |
exit | |
# move cert backups to /var/opt/mssql/data | |
sudo su | |
cp /home/ubuntu/dbm_certificate.* /var/opt/mssql/data/ | |
chown mssql:mssql /var/opt/mssql/data/dbm_certificate.* | |
exit | |
# create master key and certificate from backups on other nodes | |
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'PASSWORD'; | |
CREATE CERTIFICATE dbm_certificate | |
FROM FILE = '/var/opt/mssql/data/dbm_certificate.cer' | |
WITH PRIVATE KEY ( | |
FILE = '/var/opt/mssql/data/dbm_certificate.pvk', | |
DECRYPTION BY PASSWORD = 'PASSWORD' | |
); | |
# create hadr endpoints on all nodes | |
CREATE ENDPOINT [Hadr_endpoint] | |
AS TCP (LISTENER_PORT = 5022) | |
FOR DATABASE_MIRRORING ( | |
ROLE = ALL, | |
AUTHENTICATION = CERTIFICATE dbm_certificate, | |
ENCRYPTION = REQUIRED ALGORITHM AES | |
); | |
ALTER ENDPOINT [Hadr_endpoint] STATE = STARTED; | |
# create login for pacemaker on all nodes | |
USE [master] | |
GO | |
CREATE LOGIN [pacemakerLogin] with PASSWORD= N'PASSWORD'; | |
ALTER SERVER ROLE [sysadmin] ADD MEMBER [pacemakerLogin]; | |
GO | |
# create password file for pacemaker on all nodes | |
echo 'pacemakerLogin' >> ~/pacemaker-passwd | |
echo 'PASSWORD' >> ~/pacemaker-passwd | |
sudo mv ~/pacemaker-passwd /var/opt/mssql/secrets/passwd | |
sudo chown root:root /var/opt/mssql/secrets/passwd | |
sudo chmod 400 /var/opt/mssql/secrets/passwd | |
# create the availability group | |
CREATE AVAILABILITY GROUP [ag1] | |
WITH (CLUSTER_TYPE = EXTERNAL) | |
FOR REPLICA ON | |
N'<NODE1>' | |
WITH ( | |
ENDPOINT_URL = N'tcp://<NODE1>:5022', | |
AVAILABILITY_MODE = SYNCHRONOUS_COMMIT, | |
FAILOVER_MODE = EXTERNAL, | |
SEEDING_MODE = AUTOMATIC | |
), | |
N'<NODE2>' | |
WITH ( | |
ENDPOINT_URL = N'tcp://<NODE2>:5022', | |
AVAILABILITY_MODE = SYNCHRONOUS_COMMIT, | |
FAILOVER_MODE = EXTERNAL, | |
SEEDING_MODE = AUTOMATIC | |
), | |
N'<NODE3>' | |
WITH( | |
ENDPOINT_URL = N'tcp://<NODE3>:5022', | |
AVAILABILITY_MODE = SYNCHRONOUS_COMMIT, | |
FAILOVER_MODE = EXTERNAL, | |
SEEDING_MODE = AUTOMATIC | |
); | |
ALTER AVAILABILITY GROUP [ag1] GRANT CREATE ANY DATABASE; | |
GO | |
# join other nodes to the availability group | |
ALTER AVAILABILITY GROUP [ag1] JOIN WITH (CLUSTER_TYPE = EXTERNAL); | |
ALTER AVAILABILITY GROUP [ag1] GRANT CREATE ANY DATABASE; | |
# grant permissions to pacemaker login | |
GRANT ALTER, CONTROL, VIEW DEFINITION ON AVAILABILITY GROUP::ag1 TO [pacemakerLogin]; | |
GRANT VIEW SERVER STATE TO [pacemakerLogin]; | |
GO | |
###################################################################################################################### | |
# create availability group resource in pacemaker | |
###################################################################################################################### | |
# disable stonith for the cluster - this will be setup later | |
sudo crm configure property stonith-enabled=false | |
# create the availbility group resource via crmsh interactive shell | |
sudo crm | |
configure | |
primitive ag1_cluster \ | |
ocf:mssql:ag \ | |
params ag_name="ag1" \ | |
meta failure-timeout=60s \ | |
op start timeout=60s \ | |
op stop timeout=60s \ | |
op promote timeout=60s \ | |
op demote timeout=10s \ | |
op monitor timeout=60s interval=10s \ | |
op monitor timeout=60s on-fail=demote interval=11s role="Master" \ | |
op monitor timeout=60s interval=12s role="Slave" \ | |
op notify timeout=60s | |
ms ms-ag1 ag1_cluster \ | |
meta master-max="1" master-node-max="1" clone-max="3" \ | |
clone-node-max="1" notify="true" | |
commit | |
# check the status of the availability group | |
sudo crm resource status ms-ag1 | |
# check the cluster status | |
sudo crm status | |
###################################################################################################################### | |
# test auto-seeding in availability group | |
###################################################################################################################### | |
# create a database to test seeding in availability group | |
USE [master]; | |
GO | |
CREATE DATABASE [testdatabase1]; | |
GO | |
BACKUP DATABASE [testdatabase1] TO DISK = N'/var/opt/mssql/data/testdatabase1.bak'; | |
BACKUP LOG [testdatabase1] TO DISK = N'/var/opt/mssql/data/testdatabase1.trn'; | |
GO | |
ALTER AVAILABILITY GROUP [ag1] ADD DATABASE [testdatabase1]; | |
GO | |
###################################################################################################################### | |
# create listener | |
###################################################################################################################### | |
# create listener resource in pacemaker | |
sudo crm configure primitive virtualip \ | |
ocf:heartbeat:IPaddr2 \ | |
params ip=<LISTENER IP ADDRESS> | |
# confirm cluster status | |
sudo crm status | |
# create listener | |
ALTER AVAILABILITY GROUP [ag1] ADD LISTENER N'<LISTENER>' ( | |
WITH IP | |
((N'<LISTENER IP ADDRESS>', N'<NETMASK>')), PORT=1433); | |
GO | |
# configure colocation and ordering constraints | |
sudo crm configure colocation ag-with-listener INFINITY: virtualip ms-ag1:Master | |
sudo crm configure order ag-before-listener Mandatory: ms-ag1:promote virtualip:start | |
# test failover of availability group | |
sudo crm resource move ms-ag1 <NODE2> | |
# confirm cluster status | |
sudo crm status | |
# view constraints in cluster | |
sudo crm resource constraints ms-ag1 | |
# delete move constraint | |
sudo crm configure delete cli-prefer-ms-ag1 | |
###################################################################################################################### | |
# create STONITH resource | |
###################################################################################################################### | |
# list stonith devices | |
crm ra list stonith | |
# test using fence_vmware_rest stonith devices | |
fence_vmware_rest -a <VSPHERE IP ADDRESS> -l <LOGIN> -p <PASSWORD> --ssl-insecure -z -o list | egrep "(<NODE1>|<NODE2>|<NODE3>)" | |
fence_vmware_rest -a <VSPHERE IP ADDRESS> -l <LOGIN> -p <PASSWORD> --ssl-insecure -z -o status -n <NODE1> | |
# create fence_vmware_rest stonith resource | |
sudo crm configure primitive fence_vmware stonith:fence_vmware_rest \ | |
params \ | |
ipaddr="<VSPHERE IP ADDRESS>" \ | |
action=reboot \ | |
login="<LOGIN>" \ | |
passwd="<PASSWORD>" \ | |
ssl=1 ssl_insecure=1 \ | |
pcmk_reboot_timeout=900 \ | |
power_timeout=60 \ | |
op monitor \ | |
interval=3600 \ | |
timeout=120 | |
# configure stonith properties | |
sudo crm configure property cluster-recheck-interval=2min | |
sudo crm configure property start-failure-is-fatal=true | |
sudo crm configure property stonith-timeout=900 | |
sudo crm configure property concurrent-fencing=true | |
sudo crm configure property stonith-enabled=true | |
# delete stonith resource - whilst testing | |
#sudo crm configure property stonith-enabled=false | |
#sudo crm resource stop fence_vmware | |
#sudo crm configure delete fence_vmware | |
# confirm cluster status | |
sudo crm status | |
# test stonith | |
sudo crm cluster stop # run on target node | |
sudo crm status | |
sudo stonith_admin --reboot <NODE3> | |
sudo crm status |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment