Skip to content

Instantly share code, notes, and snippets.

@cbattlegear
Last active April 22, 2019 17:01
Show Gist options
  • Save cbattlegear/b485189131c6c4e9fd027323a3caa37e to your computer and use it in GitHub Desktop.
Save cbattlegear/b485189131c6c4e9fd027323a3caa37e to your computer and use it in GitHub Desktop.
Azure Automation Webhook and Azure Function for monitoring and failing over a SQL Azure Failover Group.
<#
.DESCRIPTION
A runbook which fails over your Azure SQL Failover group to the secondary with allow data loss
This can be used for triggered emergency failovers.
.NOTES
AUTHOR: Cameron Battagler
LASTEDIT: Apr 18, 2019
#>
#Failover Group
$failoverGroupName = "YourFailoverGroup"
#Server 1 Resource Group and Server Name
$resourceGroup1 = "Region1ResourceGroup"
$serverName1 = "Region1LogicalServer"
#Server 2 Resource Group and Server Name
$resourceGroup2 = "Region2ResourceGroup"
$serverName2 = "Region2LogicalServer"
# Azure Automation RunAs Connection
$connectionName = "AzureRunAsConnection"
try
{
# Get the connection "AzureRunAsConnection "
$servicePrincipalConnection=Get-AutomationConnection -Name $connectionName
"Logging in to Azure..."
Add-AzAccount `
-ServicePrincipal `
-TenantId $servicePrincipalConnection.TenantId `
-ApplicationId $servicePrincipalConnection.ApplicationId `
-CertificateThumbprint $servicePrincipalConnection.CertificateThumbprint
}
catch {
if (!$servicePrincipalConnection)
{
$ErrorMessage = "Connection $connectionName not found."
throw $ErrorMessage
} else{
Write-Error -Message $_.Exception
throw $_.Exception
}
}
$failoverGroup1 = Get-AzSqlDatabaseFailoverGroup -ResourceGroupName $resourceGroup1 -ServerName $serverName1 -FailoverGroupName $failoverGroupName
$failoverGroup2 = Get-AzSqlDatabaseFailoverGroup -ResourceGroupName $resourceGroup2 -ServerName $serverName2 -FailoverGroupName $failoverGroupName
if ($failoverGroup1.ReplicationRole -eq "Secondary") {
#Failover to Server 1
$failoverGroup1 | Switch-AzSqlDatabaseFailoverGroup -AllowDataLoss
$serverName = $failoverGroup1.ServerName
Write-Output "Initiated failover to $serverName"
} else {
#Failover to Server 2
$failoverGroup2 | Switch-AzSqlDatabaseFailoverGroup -AllowDataLoss
$serverName = $failoverGroup2.ServerName
Write-Output "Initiated failover to $serverName"
}
#r "System.Data"
#r "Microsoft.WindowsAzure.Storage"
using System;
using System.Net.Http;
using System.Data;
using System.Data.SqlClient;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Table;
//Object to track failure data in Table Storage
public class FailureTracking : TableEntity
{
public FailureTracking(string Partition, string Row)
{
this.PartitionKey = Partition;
this.RowKey = Row;
}
public FailureTracking() { }
public int? FailCount { get; set; }
public int? SuccessCount { get; set; }
public bool? TrackSuccess { get; set; }
//Nullable datetime to prevent MinDate being inserted
public DateTime? LastFailover { get; set; }
}
private static HttpClient httpClient = new HttpClient();
public static async Task Run(TimerInfo myTimer, ILogger log)
{
//Set your failure threshold here
int failure_threshold = 5;
//Threshold of successful connections to reset failure counter
int reset_threshold = 5;
//Set your failover webhook URL in your Application Settings
string webhook_url = Environment.GetEnvironmentVariable("FailoverWebhookUrl",EnvironmentVariableTarget.Process);
//Get table storage credentials and connect to the table
var tablestr = Environment.GetEnvironmentVariable("AzureWebJobsStorage",EnvironmentVariableTarget.Process);
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(tablestr);
CloudTableClient tableClient = storageAccount.CreateCloudTableClient();
CloudTable table = tableClient.GetTableReference("autofailovertracking");
await table.CreateIfNotExistsAsync();
log.LogInformation("Starting SQL Health Check");
//Get connection string from settings
var str = Environment.GetEnvironmentVariable("SQLAZURECONNSTR_DefaultSQLConnection",EnvironmentVariableTarget.Process);
using (SqlConnection conn = new SqlConnection(str))
{
try
{
//Connect to SQL
conn.Open();
string query = @"SELECT 1";
SqlCommand cmd = new SqlCommand(query, conn);
var output = await cmd.ExecuteScalarAsync();
//Get information from Table Storage
TableOperation retrieveOperation = TableOperation.Retrieve<FailureTracking>("Tracking", "Failure");
TableResult retrievedResult = await table.ExecuteAsync(retrieveOperation);
int? successcount = 0;
bool? tracksuccess = false;
FailureTracking _fail = new FailureTracking("Tracking", "Failure");
// Grab the number of failures (0 if the object isn't found)
if(retrievedResult.Result != null) {
successcount = ((FailureTracking)retrievedResult.Result)?.SuccessCount;
tracksuccess = ((FailureTracking)retrievedResult.Result)?.TrackSuccess;
}
if(tracksuccess != null && (bool)tracksuccess) {
_fail.SuccessCount = successcount + 1;
if(_fail.SuccessCount >= reset_threshold){
//Reset Fail count to 0 as we have had enough successful connections
_fail.FailCount = 0;
_fail.TrackSuccess = false;
TableOperation insertOperation = TableOperation.InsertOrMerge(_fail);
await table.ExecuteAsync(insertOperation);
} else {
//Record Success
TableOperation insertOperation = TableOperation.InsertOrMerge(_fail);
await table.ExecuteAsync(insertOperation);
}
}
} catch {
log.LogInformation("SQL connection or query failure");
//Get information from Table Storage
TableOperation retrieveOperation = TableOperation.Retrieve<FailureTracking>("Tracking", "Failure");
TableResult retrievedResult = await table.ExecuteAsync(retrieveOperation);
int? failcount = 0;
FailureTracking _fail = new FailureTracking("Tracking", "Failure");
// Grab the number of failures (0 if the object isn't found)
if(retrievedResult.Result != null) {
failcount = ((FailureTracking)retrievedResult.Result).FailCount;
}
_fail.FailCount = failcount + 1;
if(_fail.FailCount >= failure_threshold){
// Call azure automation to do the failover
log.LogInformation("Failing over to Secondary, threshold met");
var response = await httpClient.PostAsync(webhook_url, null);
//Set fail count to 0 and record time of failover
_fail.FailCount = 0;
_fail.TrackSuccess = false;
_fail.LastFailover = DateTime.Now;
TableOperation insertOperation = TableOperation.InsertOrMerge(_fail);
await table.ExecuteAsync(insertOperation);
} else {
//Record failure and make sure we track successful connections in case it was a transient fault
_fail.TrackSuccess = true;
TableOperation insertOperation = TableOperation.InsertOrMerge(_fail);
await table.ExecuteAsync(insertOperation);
}
}
conn.Close();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment