Last active
April 22, 2019 17:01
-
-
Save cbattlegear/b485189131c6c4e9fd027323a3caa37e to your computer and use it in GitHub Desktop.
Azure Automation Webhook and Azure Function for monitoring and failing over a SQL Azure Failover Group.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<# | |
.DESCRIPTION | |
A runbook which fails over your Azure SQL Failover group to the secondary with allow data loss | |
This can be used for triggered emergency failovers. | |
.NOTES | |
AUTHOR: Cameron Battagler | |
LASTEDIT: Apr 18, 2019 | |
#> | |
#Failover Group | |
$failoverGroupName = "YourFailoverGroup" | |
#Server 1 Resource Group and Server Name | |
$resourceGroup1 = "Region1ResourceGroup" | |
$serverName1 = "Region1LogicalServer" | |
#Server 2 Resource Group and Server Name | |
$resourceGroup2 = "Region2ResourceGroup" | |
$serverName2 = "Region2LogicalServer" | |
# Azure Automation RunAs Connection | |
$connectionName = "AzureRunAsConnection" | |
try | |
{ | |
# Get the connection "AzureRunAsConnection " | |
$servicePrincipalConnection=Get-AutomationConnection -Name $connectionName | |
"Logging in to Azure..." | |
Add-AzAccount ` | |
-ServicePrincipal ` | |
-TenantId $servicePrincipalConnection.TenantId ` | |
-ApplicationId $servicePrincipalConnection.ApplicationId ` | |
-CertificateThumbprint $servicePrincipalConnection.CertificateThumbprint | |
} | |
catch { | |
if (!$servicePrincipalConnection) | |
{ | |
$ErrorMessage = "Connection $connectionName not found." | |
throw $ErrorMessage | |
} else{ | |
Write-Error -Message $_.Exception | |
throw $_.Exception | |
} | |
} | |
$failoverGroup1 = Get-AzSqlDatabaseFailoverGroup -ResourceGroupName $resourceGroup1 -ServerName $serverName1 -FailoverGroupName $failoverGroupName | |
$failoverGroup2 = Get-AzSqlDatabaseFailoverGroup -ResourceGroupName $resourceGroup2 -ServerName $serverName2 -FailoverGroupName $failoverGroupName | |
if ($failoverGroup1.ReplicationRole -eq "Secondary") { | |
#Failover to Server 1 | |
$failoverGroup1 | Switch-AzSqlDatabaseFailoverGroup -AllowDataLoss | |
$serverName = $failoverGroup1.ServerName | |
Write-Output "Initiated failover to $serverName" | |
} else { | |
#Failover to Server 2 | |
$failoverGroup2 | Switch-AzSqlDatabaseFailoverGroup -AllowDataLoss | |
$serverName = $failoverGroup2.ServerName | |
Write-Output "Initiated failover to $serverName" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "System.Data" | |
#r "Microsoft.WindowsAzure.Storage" | |
using System; | |
using System.Net.Http; | |
using System.Data; | |
using System.Data.SqlClient; | |
using Microsoft.WindowsAzure.Storage; | |
using Microsoft.WindowsAzure.Storage.Table; | |
//Object to track failure data in Table Storage | |
public class FailureTracking : TableEntity | |
{ | |
public FailureTracking(string Partition, string Row) | |
{ | |
this.PartitionKey = Partition; | |
this.RowKey = Row; | |
} | |
public FailureTracking() { } | |
public int? FailCount { get; set; } | |
public int? SuccessCount { get; set; } | |
public bool? TrackSuccess { get; set; } | |
//Nullable datetime to prevent MinDate being inserted | |
public DateTime? LastFailover { get; set; } | |
} | |
private static HttpClient httpClient = new HttpClient(); | |
public static async Task Run(TimerInfo myTimer, ILogger log) | |
{ | |
//Set your failure threshold here | |
int failure_threshold = 5; | |
//Threshold of successful connections to reset failure counter | |
int reset_threshold = 5; | |
//Set your failover webhook URL in your Application Settings | |
string webhook_url = Environment.GetEnvironmentVariable("FailoverWebhookUrl",EnvironmentVariableTarget.Process); | |
//Get table storage credentials and connect to the table | |
var tablestr = Environment.GetEnvironmentVariable("AzureWebJobsStorage",EnvironmentVariableTarget.Process); | |
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(tablestr); | |
CloudTableClient tableClient = storageAccount.CreateCloudTableClient(); | |
CloudTable table = tableClient.GetTableReference("autofailovertracking"); | |
await table.CreateIfNotExistsAsync(); | |
log.LogInformation("Starting SQL Health Check"); | |
//Get connection string from settings | |
var str = Environment.GetEnvironmentVariable("SQLAZURECONNSTR_DefaultSQLConnection",EnvironmentVariableTarget.Process); | |
using (SqlConnection conn = new SqlConnection(str)) | |
{ | |
try | |
{ | |
//Connect to SQL | |
conn.Open(); | |
string query = @"SELECT 1"; | |
SqlCommand cmd = new SqlCommand(query, conn); | |
var output = await cmd.ExecuteScalarAsync(); | |
//Get information from Table Storage | |
TableOperation retrieveOperation = TableOperation.Retrieve<FailureTracking>("Tracking", "Failure"); | |
TableResult retrievedResult = await table.ExecuteAsync(retrieveOperation); | |
int? successcount = 0; | |
bool? tracksuccess = false; | |
FailureTracking _fail = new FailureTracking("Tracking", "Failure"); | |
// Grab the number of failures (0 if the object isn't found) | |
if(retrievedResult.Result != null) { | |
successcount = ((FailureTracking)retrievedResult.Result)?.SuccessCount; | |
tracksuccess = ((FailureTracking)retrievedResult.Result)?.TrackSuccess; | |
} | |
if(tracksuccess != null && (bool)tracksuccess) { | |
_fail.SuccessCount = successcount + 1; | |
if(_fail.SuccessCount >= reset_threshold){ | |
//Reset Fail count to 0 as we have had enough successful connections | |
_fail.FailCount = 0; | |
_fail.TrackSuccess = false; | |
TableOperation insertOperation = TableOperation.InsertOrMerge(_fail); | |
await table.ExecuteAsync(insertOperation); | |
} else { | |
//Record Success | |
TableOperation insertOperation = TableOperation.InsertOrMerge(_fail); | |
await table.ExecuteAsync(insertOperation); | |
} | |
} | |
} catch { | |
log.LogInformation("SQL connection or query failure"); | |
//Get information from Table Storage | |
TableOperation retrieveOperation = TableOperation.Retrieve<FailureTracking>("Tracking", "Failure"); | |
TableResult retrievedResult = await table.ExecuteAsync(retrieveOperation); | |
int? failcount = 0; | |
FailureTracking _fail = new FailureTracking("Tracking", "Failure"); | |
// Grab the number of failures (0 if the object isn't found) | |
if(retrievedResult.Result != null) { | |
failcount = ((FailureTracking)retrievedResult.Result).FailCount; | |
} | |
_fail.FailCount = failcount + 1; | |
if(_fail.FailCount >= failure_threshold){ | |
// Call azure automation to do the failover | |
log.LogInformation("Failing over to Secondary, threshold met"); | |
var response = await httpClient.PostAsync(webhook_url, null); | |
//Set fail count to 0 and record time of failover | |
_fail.FailCount = 0; | |
_fail.TrackSuccess = false; | |
_fail.LastFailover = DateTime.Now; | |
TableOperation insertOperation = TableOperation.InsertOrMerge(_fail); | |
await table.ExecuteAsync(insertOperation); | |
} else { | |
//Record failure and make sure we track successful connections in case it was a transient fault | |
_fail.TrackSuccess = true; | |
TableOperation insertOperation = TableOperation.InsertOrMerge(_fail); | |
await table.ExecuteAsync(insertOperation); | |
} | |
} | |
conn.Close(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment