Skip to content

Instantly share code, notes, and snippets.

@tyconsulting
Created June 29, 2019 15:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tyconsulting/81cd2b80d8b151e38d5b52b80b4c6ee3 to your computer and use it in GitHub Desktop.
Save tyconsulting/81cd2b80d8b151e38d5b52b80b4c6ee3 to your computer and use it in GitHub Desktop.
Azure Automation runbook that export data from one or more Log Analytics workspaces in all subscriptions in one or more management groups
#Requires -Version 5.0
#Requires -Modules AzureServicePrincipalAccount
<#
=============================================================================================================================
AUTHOR: Tao Yang
DATE: 06/05/2019
Version: 2.0
Comment: Export Log Analytics logs from one or more workspaces (using Kusto language and API)
Requirements:
- The Azure AD Service Principal must be assigned at least contributor role to the management groups or subscriptions
``Change Log:
- 1.0 (10/10/2017): Initial release
- 2.0 (06/05/2019): add support for querying multiple workspaces located in all subscriptions in given management groups
=============================================================================================================================
#>
[CmdletBinding()]
Param (
[Parameter(Mandatory = $true)][String]$AzureConnectionName,
[Parameter(Mandatory = $true)]
[ValidateScript({
try {
[System.Guid]::Parse($_) | Out-Null
$true
} catch {
$false
}
})]
[string]$WorkspaceId,
[Parameter(Mandatory = $false)][ValidateNotNullOrEmpty()][string[]]$managementGroupName,
[Parameter(Mandatory = $true)][ValidateNotNullOrEmpty()][string]$SearchQuery,
[Parameter(Mandatory = $true)][ValidateNotNullOrEmpty()][string]$LogFriendlyName,
[Parameter(Mandatory = $true)][ValidateSet(5,10,15,20,30,60)][int]$IntervalMinute,
[Parameter(Mandatory = $true)][ValidateSet(1,2,3,4,6,12)][int]$HourlySequenceNo,
[Parameter(Mandatory = $false)][ValidateSet(5,10,15,20,30,60)][int]$IndexingMinuteOffset = 10,
[Parameter(Mandatory = $false)][Validaterange(1,600)][int]$Timeout = 180,
[Parameter(Mandatory = $true)][ValidateScript({Test-Path $_})][string]$OutputDir,
[Parameter(Mandatory = $false)][ValidateSet('JSON', 'CSV')][string]$OutputFormat = 'JSON',
[Parameter(Mandatory = $false)][ValidateSet('Unknown', 'String', 'Unicode', 'BigEndianUnicode', 'UTF8', 'UTF7', 'UTF32', 'ASCII', 'Default', 'OEM')][string]$Encoding = 'ASCII',
[Parameter(Mandatory = $false)][Validaterange(1000,10000)][int]$MaximumRowPerFile = 5000,
[Parameter(Mandatory = $false)][boolean]$Zip = $false
)
#region functions
Function ConvertFrom-LogAnalyticsJson
{
[CmdletBinding()]
[OutputType([Object])]
Param (
[parameter(Mandatory=$true)]
[string]$JSON
)
$data = ConvertFrom-Json $JSON
$count = 0
foreach ($table in $data.Tables) {
$count += $table.Rows.Count
}
$objectView = New-Object object[] $count
$i = 0;
foreach ($table in $data.Tables) {
foreach ($row in $table.Rows) {
# Create a dictionary of properties
$properties = @{}
for ($columnNum=0; $columnNum -lt $table.Columns.Count; $columnNum++) {
$properties[$table.Columns[$columnNum].name] = $row[$columnNum]
}
# Then create a PSObject from it. This seems to be *much* faster than using Add-Member
$objectView[$i] = (New-Object PSObject -Property $properties)
$null = $i++
}
}
$objectView
}
Function Invoke-KustoSearch
{
Param (
[Parameter(Mandatory = $true)][string]$AADToken,
[Parameter(Mandatory = $true)]
[ValidateScript({
try {
[System.Guid]::Parse($_) | Out-Null
$true
} catch {
$false
}
})]
[string]$WorkspaceId,
[Parameter(Mandatory = $true)][ValidateNotNullOrEmpty()][string]$SearchQuery,
[Parameter(Mandatory = $false)][ValidateNotNullOrEmpty()][string[]]$additionalWorkspaceIds,
[Parameter(Mandatory = $false)][Validaterange(1,600)][int]$Timeout = 180,
[Parameter(Mandatory = $false)][ValidateNotNullOrEmpty()][string]$ISO8601TimeSpan
)
#Constructing queries
$arrSearchResults = @()
$RowNumber = 0
$InitialQuery = "$SearchQuery | sort by TimeGenerated asc | extend rn=row_number()"
$subsequentQueryTemplate = "$InitialQuery | where rn > {0}"
$APIResourceURI = 'https://api.loganalytics.io'
$APISearchURI = "$APIResourceURI/v1/workspaces/$WorkspaceId/query"
#request header
$RequestHeader = @{
'Authorization' = $AADToken
"Content-Type" = 'application/json'
'prefer' = "wait=$Timeout, v1-response=true"
}
#intial query request
Write-Verbose "invoking intial search request using query `"$InitialQuery`""
Write-Verbose "Timespan: '$ISO8601TimeSpan'"
#Construct REST request body
$RequestBody = @{
"query" = $InitialQuery
}
If ($PSBoundParameters.ContainsKey('additionalWorkspaceIds'))
{
$RequestBody.Add("workspaces", $additionalWorkspaceIds)
}
If ($PSBoundParameters.ContainsKey('ISO8601TimeSpan'))
{
$RequestBody.Add('timespan', $ISO8601TimeSpan)
}
$RequestBodyJSON = ConvertTo-Json -InputObject $RequestBody
Write-verbose "Requestbody:"
Write-verbose $RequestBodyJSON
#Invoke search REST request
$SearchRequest = Invoke-WebRequest -UseBasicParsing -Uri $APISearchURI -Headers $RequestHeader -Body $RequestBodyJSON -Method Post -Verbose
#process result
Write-Verbose "Parsing Log Analytics Query REST API Results."
Write-verbose $(Get-date)
$arrSearchResults += ConvertFrom-LogAnalyticsJson $SearchRequest.Content
Write-verbose $(Get-date)
#Check if subsequent requests are required
$objResponse = ConvertFrom-JSON $SearchRequest.Content
If ($objResponse.error -ne $null)
{
Write-Verbose 'Initial query did not complete successful. Potentially hitting the API throttling limits.'
Write-Verbose " - Error Code: $($objResponse.error.code)"
Write-Verbose " - Error Message: $($objResponse.error.message)"
Write-Verbose " - Inner Error code: $($objResponse.error.details.innererror.code)"
Write-Verbose " - Inner Error Message: $($objResponse.error.details.innererror.message)"
$iRepeat = 0
If ($objresponse.error.code -ieq 'partialerror')
{
$iRepeat ++
Write-Verbose "Partial Error occurred, subsequent queries required. Repeat count: $iRepeat."
$bQueryCompleted = $false
Do
{
Write-Verbose "Getting the row number for the last row returned from all previous requests."
$RowNumber = $RowNumber + $arrSearchResults.count
$subsequentQuery = [string]::Format($subsequentQueryTemplate, $RowNumber)
Write-Verbose "Performing subsequent query using query `"$subsequentQuery`""
$SubsequentRequestBody = @{
"query" = $subsequentQuery
}
If ($PSBoundParameters.ContainsKey('additionalWorkspaceIds'))
{
$SubsequentRequestBody.Add("workspaces", $additionalWorkspaceIds)
}
If ($PSBoundParameters.ContainsKey('ISO8601TimeSpan'))
{
$SubsequentRequestBody.Add('timespan', $ISO8601TimeSpan)
}
$SubsequentRequestBodyJSON = ConvertTo-Json -InputObject $SubsequentRequestBody
Write-Verbose "Subsequent Request body:"
Write-Verbose $SubsequentRequestBodyJSON
#Invoke search REST request
$SubsequentSearchRequest = Invoke-WebRequest -UseBasicParsing -Uri $APISearchURI -Headers $RequestHeader -Body $SubsequentRequestBodyJSON -Method Post
#process result
Write-Verbose "Parsing Log Analytics Query REST API Results."
$arrSearchResults += ConvertFrom-LogAnalyticsJson $SubsequentSearchRequest.Content
#Check if subsequent requests are required
$objSubsequentResponse = ConvertFrom-JSON $SubsequentSearchRequest.content
if ($objSubsequentResponse.error.code -ine 'partialerror') {$bQueryCompleted = $true}
} while (!$bQueryCompleted)
Write-Verbose "Subsequent queries completed successful."
}
} else {
Write-Verbose "Initial search query retrieved the entire result set."
}
Write-Verbose "result record count: $($arrSearchResults.count)"
$arrSearchResults
}
Function Get-QueryTimeSpan
{
[CmdletBinding()]
Param (
[Parameter(Mandatory = $true)][ValidateSet(5,10,15,20,30,60)][int]$IntervalMinute,
[Parameter(Mandatory = $true)][ValidateSet(1,2,3,4,6,12)][int]$HourlySequenceNo,
[Parameter(Mandatory = $false)][ValidateSet(5,10,15,20,30,60)][int]$IndexingMinuteOffset = 10
)
$UTCNow = (get-date).ToUniversalTime()
$UTCStartofHour = Get-Date -Year $UTCNow.Year -Month $UTCNow.Month -Day $UTCNow.Day -Hour $UTCNow.Hour -Minute 0 -Second 0 -Millisecond 0
$UTCStartofHour = [DateTime]::SpecifyKind($UTCStartofHour, [DateTimeKind]::Utc)
$StartTimeOffSet = (($HourlySequenceNo -1) * $IntervalMinute) - $IntervalMinute - $IndexingMinuteOffset
$StartTime = $UTCStartofHour.AddMinutes($StartTimeOffSet)
$StartYear = $StartTime.Year
$StartMonth = '{0:D2}' -f $StartTime.Month
$StartDay = '{0:D2}' -f $StartTime.Day
$StartHour = '{0:D2}' -f $StartTime.Hour
$StartMinute = '{0:D2}' -f $StartTime.Minute
$ISO8601TimeSpanTemplate = "{0}-{1}-{2}T{3}:{4}:00Z/PT{5}M"
$ISO8601TimeSpan = [System.String]::Format($ISO8601TimeSpanTemplate, $StartYear, $StartMonth, $StartDay, $StartHour, $StartMinute, $IntervalMinute)
$ISO8601TimeSpan
}
Function Export-ResultToFile
{
[CmdletBinding()]
Param (
[Parameter(Mandatory = $true)][psobject[]]$Logs,
[Parameter(Mandatory = $true)][ValidateScript({Test-Path $_})][string]$OutputDir,
[Parameter(Mandatory = $true)][ValidateNotNullOrEmpty()][string]$FileName,
[Parameter(Mandatory = $true)][ValidateSet('JSON', 'CSV')][string]$OutputFormat,
[Parameter(Mandatory = $true)][ValidateSet('Unknown', 'String', 'Unicode', 'BigEndianUnicode', 'UTF8', 'UTF7', 'UTF32', 'ASCII', 'Default', 'OEM')][string]$Encoding
)
$OutputFilePath = Join-Path $OutputDir $FileName
Write-Verbose "Exporting to '$OutputFilePath'..."
Switch ($OutputFormat)
{
'CSV' {$Logs | Export-CSV -LiteralPath $OutputFilePath -NoTypeInformation -Force -Encoding $Encoding}
'JSON' {ConvertTo-JSON -InputObject $Logs -Depth 99 | Out-File $OutputFilePath -Force -Encoding $Encoding}
}
$OutputFilePath
}
Function GetAzureSubscriptionsbyMG
{
[CmdletBinding()]
Param (
[Parameter(Mandatory = $true)][Hashtable]$RequestHeaders,
[Parameter(Mandatory = $true)][string[]]$ManagementGroupName
)
$subs = New-Object System.Collections.ArrayList
Foreach ($MG in $ManagementGroupName)
{
$URI = "https://management.azure.com/providers/microsoft.management/getEntities?api-version=2018-03-01-preview&`$filter=`"name eq '$MG'`""
Try {
$GetEntitiesResponse = Invoke-WebRequest -UseBasicParsing -Uri $URI -Headers $RequestHeaders -Method POST
If ($GetEntitiesResponse.StatusCode -ge 200 -and $GetEntitiesResponse.StatusCode -le 299)
{
foreach ($item in (ConvertFrom-Json $GetEntitiesResponse.Content).value)
{
if ($item.type -ieq '/subscriptions' -and (!$subs.Contains($item.name)))
{
[void]$subs.Add($item.name)
}
}
}
} Catch {
Throw $_.Exception
}
}
$Subs
}
Function InvokeResourceGraphQuery
{
Param (
[Parameter(Mandatory = $true)][Hashtable]$RequestHeaders,
[Parameter(Mandatory = $true)][string]$Query,
[Parameter(Mandatory = $true)][string[]]$subscriptions
)
$URI = "https://management.azure.com/providers/Microsoft.ResourceGraph/resources?api-version=2018-09-01-preview"
#Construct Request body
$objRequestBody = new-object System.Management.Automation.PSObject -Property @{subscriptions = $subscriptions;query = $query}
$jsonRequestBody = ConvertTo-Json -InputObject $objRequestBody -Depth 5
Try {
$SearchRequest = Invoke-WebRequest -UseBasicParsing -Uri $URI -Headers $RequestHeaders -Body $jsonRequestBody -Method POST -ContentType "application/json"
If ($SearchRequest.StatusCode -ge 200 -and $SearchRequest.StatusCode -le 299)
{
#parse search result
$data = ConvertFrom-Json $SearchRequest.content
$count = 0
foreach ($item in $data.data) {
$count += $item.Rows.Count
}
$SearchResult = New-Object object[] $count
$i = 0;
foreach ($item in $data.data) {
foreach ($row in $item.Rows) {
# Create a dictionary of properties
$properties = @{}
for ($columnNum=0; $columnNum -lt $item.Columns.Count; $columnNum++) {
$properties[$item.Columns[$columnNum].name] = $row[$columnNum]
}
# Then create a PSObject from it. This seems to be *much* faster than using Add-Member
$SearchResult[$i] = (New-Object PSObject -Property $properties)
$null = $i++
}
}
}
} Catch {
Throw $_.Exception
}
$SearchResult
}
#endregion
#region variables
$APIResourceURI = 'https://api.loganalytics.io'
$foreignWorkspaceIds = @()
#endregion
#region main
$AzureConnection = Get-AutomationConnection -Name $AzureConnectionName
#Get AAD Token
Write-Verbose "Requesting Azure AD oAuth tokens"
$ARMToken = Get-AzureADToken -AzureServicePrincipalConnection $AzureConnection
$ARMRequestHeaders = @{'Authorization' = $ARMToken}
$AADToken = Get-AzureADToken -AzureServicePrincipalConnection $AzureConnection -ResourceURI $APIResourceURI
#Get Log Analytics workspaces that are in scope (within the MG)
#if management group name(s) are not specified, use the Tenant Root MG
If (!$PSBoundParameters.ContainsKey('managementGroupName'))
{
$managementGroupName = $AzureConnection.TenantId
}
#Get all subscription Ids in under the MG hireachy. Need these IDs for the Resource Graph query
$AzureSubs = GetAzureSubscriptionsbyMG -RequestHeaders $ARMRequestHeaders -ManagementGroupName $managementGroupName
$ofs = ", "
Write-verbose "Azure subscriptions in management group $managementGroupName`: $AzureSubs"
#Get all Log Analytics workspaces in eligible subscriptions
$getLAWorkspaceQuery = "where type =~ 'microsoft.operationalinsights/workspaces'"
$workspaces = InvokeResourceGraphQuery -RequestHeaders $ARMRequestHeaders -Query $getLAWorkspaceQuery -subscriptions $AzureSubs
Foreach ($workspace in $workspaces)
{
if ($workspace.properties.customerId -ine $workspaceId)
{
$foreignWorkspaceIds += $workspace.properties.customerId
}
}
#Work out the search time span
$ISO8601TimeSpan = Get-QueryTimeSpan -IntervalMinute $IntervalMinute -HourlySequenceNo $HourlySequenceNo -IndexingMinuteOffset $IndexingMinuteOffset
Write-verbose "Log Analytics search request ISO 8601 time span: '$ISO8601TimeSpan'."
#Invoke search API
Write-Verbose "Invoking search request. Search query: `"$SearchQuery`""
$queryParams = @{
AADToken = $AADToken
WorkspaceId = $WorkspaceId
SearchQuery = $SearchQuery
ISO8601TimeSpan = $ISO8601TimeSpan
Timeout = $Timeout
}
If ($foreignWorkspaceIds)
{
$queryParams.Add('additionalWorkspaceIds', $foreignWorkspaceIds)
}
$SearchResult = Invoke-KustoSearch @queryParams
#Export logs to files
Write-Output "Number of $LogFriendlyName events retrieved: $($searchresult.count)"
$bAllExported = $false
$iExportCount = 0
$totalExported = 0
$arrExportFiles = @()
Do {
if ($SearchResult.count -gt $MaximumRowPerFile)
{
#chop results to multiple files
$ExportSet = $searchresult | Select-Object -First $MaximumRowPerFile -Skip ($MaximumRowPerFile * $iExportCount)
$iExportCount ++
#Export Log
$FileName = "$LogFriendlyName-$($ISO8601TimeSpan.replace(':', '.').split('Z/')[0])`-$iExportCount`.$OutputFormat"
Write-output "Exporting Batch No. $iExportCount with $($ExportSet.count) rows to '$FileName'"
$arrExportFiles += Export-ResultToFile -Logs $ExportSet -OutputDir $OutputDir -FileName $FileName -OutputFormat $OutputFormat -Verbose
$totalExported = $totalExported + $ExportSet.Count
If ($totalExported -eq $SearchResult.Count)
{
$bAllExported = $true
}
} else {
#dump all results to a single file
$FileName = "$LogFriendlyName-$($ISO8601TimeSpan.replace(':', '.').split('Z/')[0])`-1.$OutputFormat"
Write-output "Exporting entire result set with $($searchresult.count) rows to '$FileName'"
$arrExportFiles += Export-ResultToFile -Logs $searchresult -OutputDir $OutputDir -FileName $FileName -OutputFormat $OutputFormat -Encoding $Encoding
$bAllExported = $true
}
} While (!$bAllExported)
Write-output '', "All logs are exported to '$OutputDir'."
#Zip files
If ($Zip)
{
$ZipFileName = "$LogFriendlyName-$($ISO8601TimeSpan.replace(':', '.').split('Z/')[0])`.zip"
Write-output '', "Creating zip file"
$CreateZip = Compress-Archive -LiteralPath $arrExportFiles -DestinationPath $(Join-Path $OutputDir $ZipFileName) -CompressionLevel Optimal -Force
Write-Output "Deleting raw files"
$DeleteFiles = remove-item -Path $arrExportFiles -Force
}
Write-Output "Total files created: $($arrExportFiles.Count)"
Write-output "Done!"
#endregion
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment