paulbatum/functions-eventhubs-appinsights-queries.kql

## functions-eventhubs-appinsights-queries.kql
// Query Notes
// -------------
// 1. This query display detailed information for Event Hub triggered functions using telemetry emitted by the Event Hubs extension 4.2.0 and greater.
//
// 2. The data is only emitted in the correct format if batched dispatch is used i.e. the function accepts multiple events for each execution. This is the recommended way to write Event Hub triggered functions. For an example see the documentation https://docs.microsoft.com/en-us/azure/azure-functions/functions-bindings-event-hubs-trigger?tabs=csharp
//
// 3. If sampling is enabled in Application Insights, then there might be gaps in the data. To configure sampling see https://docs.microsoft.com/en-us/azure/azure-functions/configure-monitoring?tabs=v2#configure-sampling
//
// 4. The dispatchTimeMilliseconds value approximates the length of time between when the event was written to the event hub and when it was picked up by the Function App for processing. Note that:
//    a) dispatchTimeMilliseconds could be negative or otherwise inaccurate due to clockdrift between the EventHub server and the function app
//    b) Event Hubs partitions are processed sequentially. A message won't be dispatched to function code for processing until all previous messages have been processed. Monitor the execution time of your functions as longer execution times will cause dispatch delays.
//    c) The calculation uses the enqueueTime of the *first* message in the batch. Dispatch time might be lower for other messages in the batch.
//    d) dispatchTimeMilliseconds is based on the point in time
//
// 5. Keep in mind that sequence numbers are per-partition, and duplicate processing can occur because Event Hubs does not guarantee exactly-once message delivery
//
traces
| where message startswith "Trigger Details: Parti"
| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
| extend dispatchTimeMilliseconds = (timestamp - enqueueTimeStart) / 1ms
| project timestamp, cloud_RoleInstance, operation_Name, processId = customDimensions.ProcessId, partitionId, messageCount, sequenceNumberStart, sequenceNumberEnd, enqueueTimeStart, enqueueTimeEnd, dispatchTimeMilliseconds


// This query visualizes the 50th and 90th percentile event dispatch latency for a given event hub triggered function. See the above query for more details and notes.
traces
//| where operation_Name == "<<ENTER THE NAME OF YOUR FUNCTION HERE>>"
| where operation_Name == "ReplicateReadingsToTopic"
| where message startswith "Trigger Details: Parti"
| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
| extend dispatchTimeMilliseconds = (timestamp - enqueueTimeStart) / 1ms
| summarize percentiles(dispatchTimeMilliseconds, 50, 90) by bin(timestamp, 5m)
| render timechart

// Similar to above, but a summary view
traces
| where message startswith "Trigger Details: Parti"
| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
| extend dispatchTimeMilliseconds = (timestamp - enqueueTimeStart) / 1ms
| summarize messageCount = sum(messageCount), percentiles(dispatchTimeMilliseconds, 50, 90, 99, 99.9, 99.99) by operation_Name

// Vizualize message distribution across partitions
traces
| where message startswith "Trigger Details: Parti"
| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
| summarize messageCount = sum(messageCount) by partitionId, bin(timestamp, 5m)
| render areachart kind=stacked

// Vizualize message distribution across instances
traces
| where message startswith "Trigger Details: Parti"
| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
| summarize messageCount = sum(messageCount) by cloud_RoleInstance, bin(timestamp, 5m)
| render areachart kind=stacked

// Vizualize how many of the instances allocated to the function app are executing eventhub triggered functions
traces
| where message startswith "Trigger Details: Parti"
| summarize type = "Executing Instances", Count = dcount(cloud_RoleInstance) by bin(timestamp, 15s)
| union (
    traces
    | summarize type = "Allocated Instances", Count = dcount(cloud_RoleInstance) by bin(timestamp, 15s)
)
| project timestamp, type, Count
| render timechart
	// Query Notes
	// -------------
	// 1. This query display detailed information for Event Hub triggered functions using telemetry emitted by the Event Hubs extension 4.2.0 and greater.
	//
	// 2. The data is only emitted in the correct format if batched dispatch is used i.e. the function accepts multiple events for each execution. This is the recommended way to write Event Hub triggered functions. For an example see the documentation https://docs.microsoft.com/en-us/azure/azure-functions/functions-bindings-event-hubs-trigger?tabs=csharp
	//
	// 3. If sampling is enabled in Application Insights, then there might be gaps in the data. To configure sampling see https://docs.microsoft.com/en-us/azure/azure-functions/configure-monitoring?tabs=v2#configure-sampling
	//
	// 4. The dispatchTimeMilliseconds value approximates the length of time between when the event was written to the event hub and when it was picked up by the Function App for processing. Note that:
	// a) dispatchTimeMilliseconds could be negative or otherwise inaccurate due to clockdrift between the EventHub server and the function app
	// b) Event Hubs partitions are processed sequentially. A message won't be dispatched to function code for processing until all previous messages have been processed. Monitor the execution time of your functions as longer execution times will cause dispatch delays.
	// c) The calculation uses the enqueueTime of the first message in the batch. Dispatch time might be lower for other messages in the batch.
	// d) dispatchTimeMilliseconds is based on the point in time
	//
	// 5. Keep in mind that sequence numbers are per-partition, and duplicate processing can occur because Event Hubs does not guarantee exactly-once message delivery
	//
	traces
	\| where message startswith "Trigger Details: Parti"
	\| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
	\| extend dispatchTimeMilliseconds = (timestamp - enqueueTimeStart) / 1ms
	\| project timestamp, cloud_RoleInstance, operation_Name, processId = customDimensions.ProcessId, partitionId, messageCount, sequenceNumberStart, sequenceNumberEnd, enqueueTimeStart, enqueueTimeEnd, dispatchTimeMilliseconds


	// This query visualizes the 50th and 90th percentile event dispatch latency for a given event hub triggered function. See the above query for more details and notes.
	traces
	//\| where operation_Name == "<<ENTER THE NAME OF YOUR FUNCTION HERE>>"
	\| where operation_Name == "ReplicateReadingsToTopic"
	\| where message startswith "Trigger Details: Parti"
	\| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
	\| extend dispatchTimeMilliseconds = (timestamp - enqueueTimeStart) / 1ms
	\| summarize percentiles(dispatchTimeMilliseconds, 50, 90) by bin(timestamp, 5m)
	\| render timechart

	// Similar to above, but a summary view
	traces
	\| where message startswith "Trigger Details: Parti"
	\| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
	\| extend dispatchTimeMilliseconds = (timestamp - enqueueTimeStart) / 1ms
	\| summarize messageCount = sum(messageCount), percentiles(dispatchTimeMilliseconds, 50, 90, 99, 99.9, 99.99) by operation_Name

	// Vizualize message distribution across partitions
	traces
	\| where message startswith "Trigger Details: Parti"
	\| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
	\| summarize messageCount = sum(messageCount) by partitionId, bin(timestamp, 5m)
	\| render areachart kind=stacked

	// Vizualize message distribution across instances
	traces
	\| where message startswith "Trigger Details: Parti"
	\| parse message with * "tionId: " partitionId:string ", Offset: " offsetStart:string "-" offsetEnd:string", EnqueueTimeUtc: " enqueueTimeStart:datetime "Z-" enqueueTimeEnd:datetime ", SequenceNumber: " sequenceNumberStart:string "-" sequenceNumberEnd:string ", Count: " messageCount:int
	\| summarize messageCount = sum(messageCount) by cloud_RoleInstance, bin(timestamp, 5m)
	\| render areachart kind=stacked

	// Vizualize how many of the instances allocated to the function app are executing eventhub triggered functions
	traces
	\| where message startswith "Trigger Details: Parti"
	\| summarize type = "Executing Instances", Count = dcount(cloud_RoleInstance) by bin(timestamp, 15s)
	\| union (
	traces
	\| summarize type = "Allocated Instances", Count = dcount(cloud_RoleInstance) by bin(timestamp, 15s)
	)
	\| project timestamp, type, Count
	\| render timechart