Last active
July 28, 2021 07:19
-
-
Save daniel0x00/47523a08bdd658528e4639a3da838e7e to your computer and use it in GitHub Desktop.
PowerShell Azure Function that takes an array object as input and outputs a JSON array with multiple chunks of Splunk-HEC compliant objects.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using namespace System.Net | |
param($Request, $TriggerMetadata) | |
### | |
# PowerShell serverless Function that receives a HTTP POST payload and converts it to a Splunk HEC grouped payload, optionally adding selected DotNotation properties. | |
### | |
# Author: Daniel Ferreira (@daniel0x00) | |
# License: BSD 3-Clause | |
# Source: https://gist.github.com/daniel0x00/47523a08bdd658528e4639a3da838e7e | |
### | |
# Use-case: use this Azure Function to convert bulk JSON input coming from 3rd party integrations to Splunk HTTP Event Collector (HEC) compliant payloads. | |
# Useful to be used in conjuction with Azure Logic Apps / Azure Functions / AWS Lambda / Google Functions as an integration pipeline for Cloud-native data. ` | |
# E.g. data coming from Azure Event Hubs streaming, Cloud asset inventory from AWS/Google/Azure, from ServiceNow asset inventory, etc. | |
# Note the 'DotNotation' functionality will add a 'dotnotation' object to the original JSON payload that you'll be able to ` | |
# use with TERM() and PREFIX() Splunk's directives for easy and powerful searches and direct usage of |tstats command. | |
# You can also output only the 'dotnotation' property to save Splunk license. This is only recommended for data you aim to use only with |tstats. | |
### | |
### | |
### Input HTTP POST Body schema: | |
### | |
# { | |
# "InputObject": [<object>,<object>], --> input array object. | |
# "IndexValue": "<string>", --> static value for 'index' field. | |
# "SourcetypeValue": "<string>", --> static value for 'sourcetype' field. | |
# "SourceRegexValue": "<string>", --> regex to fill 'source' field. E.g: \"resourceId\":\\s?\"(?<source>[\\w\\-\\.]+)\" | |
# "HostRegexValue": "<string>", --> regex to fill 'host' field. E.g: \"callerIpAddress\":\\s?\"(?<host>[\\w\\-\\.\\:]+)\" | |
# "TimeRegexValue": "<string>", --> regex to fill 'time' field. E.g: \"time\":\\s?\"(?<time>[\\w\\-\\.\\:]+)\". If EnableTimeExtraction=false, leave this value as empty string. | |
# "EnableTimeExtraction": false, --> Indicates if time field is expected. If 'true', the regex seen on 'TimeRegexValue' will be applied to capture the time. If 'false', the UTC time will be used. | |
# "OutputArrayChunks": 1000, --> Indicates how many events will be grouped in chunks by the output object. | |
# "DotNotation": true, --> Indicates if function will add 'dotnotation' property | |
# "DotNotationOnly": false, --> Indicates if function will return only 'dotnotation' properties. | |
# "DotNotationExpandProperty": "<string>", --> Indicates if function will expand only this property for the 'dotnotation' object. Only accepts 1 field. E.g. "*" for all or "<string>" for specific one. Use '*' by default. | |
# "DotNotationDesiredProperties": false, --> Indicates if function will return only certain 'dotnotation' properties instead of all properties. | |
# "DotNotationDesiredPropertiesList": "<string>,<string>" --> Indicates which 'dotnotation' properties will be returned. Note you cannot use spaces between the comma separator. | |
# } | |
### | |
### Output JSON schema | |
### Note: To forward to Splunk HEC, you must loop for each of the chunks and merge child objects together using `\n` character. | |
### This means that Splunk expects to receive a non-compliant JSON payload. This is Ok. | |
### To concatenate chunk's objects in Azure Logic Apps, use the following helper: @join(items('For_each'),'\n') | |
### | |
# { | |
# "items": { | |
# "items": { | |
# "properties": { | |
# "event": { | |
# "properties": {}, | |
# "type": "object" | |
# }, | |
# "host": { | |
# "type": "string" | |
# }, | |
# "index": { | |
# "type": "string" | |
# }, | |
# "source": { | |
# "type": "string" | |
# }, | |
# "sourcetype": { | |
# "type": "string" | |
# }, | |
# "time": { | |
# "type": "integer" | |
# } | |
# }, | |
# "required": [ | |
# "event", | |
# "index", | |
# "sourcetype", | |
# "source", | |
# "host", | |
# "time" | |
# ], | |
# "type": "object" | |
# }, | |
# "type": "array" | |
# }, | |
# "type": "array" | |
# } | |
# ConvertTo-DotNotation: | |
function ConvertTo-DotNotation { | |
# PowerShell cmdlet that converts a PowerShell/JSON object into a dot.notation array. | |
# For Splunk use cases, this cmdlet enables data to be TERM() and PREFIX() ready. | |
# Author: Daniel Ferreira (@daniel0x00) | |
# License: BSD 3-Clause | |
# Source: https://gist.github.com/daniel0x00/15f8871f2c0aca803e7f60ae0a1f42c1 | |
# Note: any improvement opportunity to make this function "a 1-liner" producing same results is very welcomed. | |
[CmdletBinding()] | |
[OutputType([System.Array])] | |
param( | |
[Parameter(Position=0, Mandatory=$true, ValueFromPipeline=$true)] | |
[PSCustomObject] $InputObject, | |
[Parameter(Position=1, Mandatory=$false, ValueFromPipeline=$false)] | |
[string] $Parent='' | |
) | |
begin { $name = ''; } | |
process { | |
foreach ($item in $InputObject.PSObject.Properties) { | |
$name += "$Parent$($item.Name)" | |
# Match everything but objects | |
if (($item.TypeNameOfValue -notmatch 'Object') -and ($item.TypeNameOfValue -notmatch 'Microsoft.PowerShell') -and ($item.TypeNameOfValue -notmatch 'Selected.System')) { | |
# Match string array: | |
if ($item.TypeNameOfValue -match 'String\[\]$') { | |
# Output: | |
if ($item.Value.Length -eq 1) { | |
"{0}={1}" -f $name, $item.Value[0] #-replace ' ','__' -replace ',','___' | |
#$output | |
$name = '' | |
} | |
else { | |
for ($x=0; $x -lt $item.Value.Length; $x++) { | |
"{0}={1}" -f "$name.$($x+1)", $item.Value[$x] #-replace ' ','__' -replace ',','___' | |
#$output | |
} | |
$name = '' | |
} | |
} | |
# Match any other type of object that can be converted into text: | |
else { | |
# Output: | |
"{0}={1}" -f $name, $item.Value #-replace ' ','__' -replace ',','___' | |
#$output | |
$name = '' | |
} | |
} | |
# Match null values: | |
elseif (($null -eq $item.Value) -or ($item.Value.Count -eq 0)) { | |
# Output: | |
"{0}=null" -f $name | |
$name = '' | |
} | |
# Match array: | |
elseif ($item.TypeNameOfValue -match 'Object\[\]') { | |
# Output: | |
$objectType = $item.Value[0].GetType() | |
if ($objectType -match 'Object') { | |
$Parent = $name | |
for ($x=0; $x -lt $item.Value.Length; $x++) { | |
#$name = "$Parent.$($x+1)." | |
#$child = $item.Value[$x] | |
ConvertTo-DotNotation -Input ($item.Value[$x]) -Parent ("$Parent.$($x+1).") | |
} | |
$Parent = $Parent -replace '\.\w+$','.' -replace ($name -replace '\.[\d]+\.',''),'' | |
$name = '' | |
} | |
else { | |
if ($item.Value.Length -eq 1) { | |
"{0}={1}" -f $name, $item.Value[0] #-replace ' ','__' -replace ',','___' | |
#$output | |
$name = '' | |
} | |
else { | |
for ($x=0; $x -lt $item.Value.Length; $x++) { | |
"{0}={1}" -f "$name.$($x+1)", $item.Value[$x] #-replace ' ','__' -replace ',','___' | |
#$output | |
} | |
$name = '' | |
} | |
} | |
} | |
# Match objects: | |
else { | |
ConvertTo-DotNotation -Input $item.Value -Parent "$name." | |
$name = '' | |
} | |
} | |
} | |
} | |
# Return: | |
$StatusCode = [httpstatuscode]::OK | |
# Set output vars: | |
$OutputList = [System.Collections.Generic.List[string]]::new() | |
$Output = [string]::empty | |
try { | |
# Parse input: | |
$InputObject = $Request.Body.InputObject | |
$IndexValue = $Request.Body.IndexValue | |
$SourcetypeValue = $Request.Body.SourcetypeValue | |
$SourceRegexValue = $Request.Body.SourceRegexValue | |
$HostRegexValue = $Request.Body.HostRegexValue | |
$TimeRegexValue = $Request.Body.HostRegexValue | |
$OutputArrayChunks = $Request.Body.OutputArrayChunks | |
$EnableTimeExtraction = $Request.Body.EnableTimeExtraction ?? $false | |
$DotNotation = $Request.Body.DotNotation ?? $false | |
$DotNotationOnly = $Request.Body.DotNotationOnly ?? $false | |
$DotNotationExpandProperty = $Request.Body.DotNotationExpandProperty ?? "*" | |
$DotNotationDesiredProperties = $Request.Body.DotNotationDesiredProperties ?? $false | |
$DotNotationDesiredPropertiesList = $Request.Body.DotNotationDesiredPropertiesList ?? "*" | |
# Const: | |
$JSONDepth = 10 | |
## Output: | |
# Output the array in JSON format sliced in chunks | |
# The intention of this output format is for upstream systems to group, split or distribute the JSON array in the most convenient way. | |
# E.g. On an 10.000 rows input, divide the output into 10 outputs of 1000 rows each and send to Splunk HEC each grouped payload with 5 segs difference in between them. | |
$OutputArray = [System.Collections.Generic.List[Object]]::new() | |
# Group output | |
$counter = [pscustomobject] @{ Value = 0 } | |
# Time value: | |
$Time = ([int](New-TimeSpan -Start (Get-Date '01/01/1970') -End (Get-Date).ToUniversalTime()).TotalSeconds) | |
$GroupedOutput = $InputObject.ForEach({ | |
$Event = [PSCustomObject]$_ | |
# Add 'dotnotation' property: | |
if ($DotNotation) { | |
if ($DotNotationOnly) { | |
# Check if only a property is desired to be expanded for dotnotation: | |
if ($DotNotationExpandProperty -eq '*') { | |
# Check if only desired dotnotation properties are needed: | |
if ($DotNotationDesiredProperties) { $Event = $Event | Select-Object @{n='dotnotation';e={$_ | Select-Object -Property ($DotNotationDesiredPropertiesList -split ',') | ConvertTo-JSON -Compress -Depth $JSONDepth | ConvertFrom-JSON | ConvertTo-DotNotation}} } | |
else { $Event = $Event | Select-Object @{n='dotnotation';e={$_ | ConvertTo-JSON -Compress -Depth $JSONDepth | ConvertFrom-JSON | ConvertTo-DotNotation}} } | |
} | |
else { | |
# Check if only desired dotnotation properties are needed: | |
if ($DotNotationDesiredProperties) { $Event = $Event | Select-Object @{n='dotnotation';e={$_ | Select-Object -ExpandProperty $DotNotationExpandProperty | Select-Object -Property ($DotNotationDesiredPropertiesList -split ',') | ConvertTo-JSON -Compress -Depth $JSONDepth | ConvertFrom-JSON | ConvertTo-DotNotation}} } | |
else { $Event = $Event | Select-Object @{n='dotnotation';e={$_ | ConvertTo-JSON -Compress -Depth $JSONDepth | ConvertFrom-JSON | ConvertTo-DotNotation}} } | |
} | |
} | |
else { | |
# Check if only a property is desired to be expanded for dotnotation: | |
if ($DotNotationExpandProperty -eq '*') { | |
# Check if only desired dotnotation properties are needed: | |
if ($DotNotationDesiredProperties) { $Event = $Event | Select-Object *, @{n='dotnotation';e={$_ | Select-Object -Property ($DotNotationDesiredPropertiesList -split ',') | ConvertTo-JSON -Compress -Depth $JSONDepth | ConvertFrom-JSON | ConvertTo-DotNotation}} } | |
else { $Event = $Event | Select-Object *, @{n='dotnotation';e={$_ | ConvertTo-JSON -Compress -Depth $JSONDepth | ConvertFrom-JSON | ConvertTo-DotNotation}} } | |
} | |
else { | |
# Check if only desired dotnotation properties are needed: | |
if ($DotNotationDesiredProperties) { $Event = $Event | Select-Object *, @{n='dotnotation';e={$_ | Select-Object -ExpandProperty $DotNotationExpandProperty | Select-Object -Property ($DotNotationDesiredPropertiesList -split ',') | ConvertTo-JSON -Compress -Depth $JSONDepth | ConvertFrom-JSON | ConvertTo-DotNotation}} } | |
else { $Event = $Event | Select-Object *, @{n='dotnotation';e={$_ | ConvertTo-JSON -Compress -Depth $JSONDepth | ConvertFrom-JSON | ConvertTo-DotNotation}} } | |
} | |
} | |
} | |
## Save raw event in JSON to be able to apply regex: | |
$rawEvent = $Event | Select-Object -ExcludeProperty properties,resources | ConvertTo-JSON -Depth $JSONDepth -Compress | |
# New values of 'source', 'host' and 'time': | |
$SourceValueOverwritten = [string]([regex]::Match($rawEvent,$SourceRegexValue).groups['source'].value).ToLower() | |
$HostValueOverwritten = [string]([regex]::Match($rawEvent,$HostRegexValue).groups['host'].value).ToLower() | |
if ($EnableTimeExtraction) { $Time = [int](New-TimeSpan -Start (Get-Date '01/01/1970') -End (Get-Date ([string](([regex]::Match($rawEvent,$TimeRegexValue)).groups['time'].value))).ToUniversalTime()).TotalSeconds } | |
[PSCustomObject]@{ | |
event = $Event; | |
index = $IndexValue; | |
sourcetype = $SourcetypeValue; | |
source = $SourceValueOverwritten; | |
host = $HostValueOverwritten; | |
time = $Time; | |
} | |
}) | Group-Object -Property { [math]::Floor($counter.Value++ / $OutputArrayChunks) } | |
# Build output array: | |
foreach ($item in $GroupedOutput) { $null = $OutputArray.Add($item.group) } | |
$Output = $OutputArray | ConvertTo-Json -Depth $JSONDepth -AsArray -Compress | |
} | |
catch { | |
Write-Verbose "Catch triggered: $_" -Verbose | |
$StatusCode = [httpstatuscode]::InternalServerError | |
} | |
finally { | |
Push-OutputBinding -Name Response -Value ([HttpResponseContext]@{ | |
ContentType = 'application/json' | |
StatusCode = $StatusCode | |
Body = $Output | |
}) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment