/p_queryProgress.sql

## p_queryProgress.sql
/* Create a dummy version of the procedure (so that we can use ALTER PROCEDURE below) */
IF object_id('dbo.p_queryProgress') IS NULL
BEGIN
	EXEC('CREATE PROCEDURE dbo.p_queryProgress AS BEGIN RETURN END')
END
GO

/* GP 11/6/2015 Add a stored procedure to view live query progress on SQL 2014+
	Note that only queries for which the actual execution plan is being captured will appear.
	The easiest way to do this is to run the query in SSMS with "Include Actual Execution Plan" on.

	Another way is to run an extended event session with the sqlserver.query_post_execution_showplan
	event on, but be careful about this option because it does add significant overhead.
	See http://sqlperformance.com/2013/03/sql-plan/showplan-impact for details on the overhead.

		Example usages:
  			EXEC p_queryProgress
			EXEC p_queryProgress @threadDetail = 1 -- Add pivoted columns showing the distribution of rows across each thread
			EXEC p_queryProgress @threadDetail = 2 -- Expand the result set to have a separate row for each thread
 			EXEC p_queryProgress @session_id = 100 -- Filter results to a single session
			EXEC p_queryProgress @node_id = 7 -- Filter results to a single node in the query plan (automatically expands result set to have a separate row for each thread)
*/
ALTER PROCEDURE dbo.p_queryProgress
	@threadDetail TINYINT = 0, /* 0, 1, or 2; see example usages */
	@session_id INT = NULL,
	@node_id INT = NULL
AS
BEGIN
	/**********************************************************************
	 Input validation and processing
	**********************************************************************/

	-- Confirm SQL 2014+
	IF OBJECT_ID('sys.dm_exec_query_profiles') IS NULL
	BEGIN
		RAISERROR('p_queryProgress can only be run on a SQL 2014+ server.', 16, 1)
		RETURN
	END

	-- If viewing results for just a single node, automatically maximize thread detail
	IF @node_id IS NOT NULL
		SET @threadDetail = 2

	/**********************************************************************
	 Raw data collection
	**********************************************************************/

	-- Capture all the raw data by thread from sys.dm_exec_query_profiles
	-- (See https://msdn.microsoft.com/en-us/library/dn223301.aspx for documentation)
	-- GP 4/6/2016 APT-84606 We now capture database_id/object_id, which we'll use to update object_name
	SELECT session_id, node_id, physical_operator_name, thread_id, row_count, estimate_row_count, elapsed_time_ms, cpu_time_ms,
			database_id, object_id, CAST(NULL AS VARCHAR(256)) AS object_name,
			scan_count, logical_read_count, physical_read_count, read_ahead_count, write_page_count, segment_read_count, segment_skip_count
	INTO #tempQueryProgress
	FROM sys.dm_exec_query_profiles WITH (NOLOCK)
	WHERE session_id <> @@SPID /* Avoid the session running p_queryProgress! */
		AND (@session_id IS NULL OR session_id = @session_id) /* If requested, filter to a single session */
		AND (@node_id IS NULL OR node_id = @node_id) /* If requested, filter to a single node in the query plan */
	-- Key the raw data
	ALTER TABLE #tempQueryProgress ADD UNIQUE CLUSTERED (session_id, node_id, thread_id)

	/**********************************************************************
	 Collect object names for any seeks/scans
	**********************************************************************/

	-- Populate object_name from object_id/database_id in a manner that is not susceptible to being blocked by a SCH-M lock.
	-- Unfortunately, SQL Server does not provide support for invoking OBJECT_NAME() using the READ UNCOMMITTED isolation level.
	-- Therefore, the otherwise-very-convenient OBJECT_NAME(object_id, database_id) API cannot be used because it will cause
	-- p_queryProgress to block indefinitely if any of the ongoing queries access an object that has been modified (SCH-M lock) during the transaction.
	-- See http://connect.microsoft.com/SQL/feedback/ViewFeedback.aspx?FeedbackID=432497 for a relevant enhancement request.
	SELECT DISTINCT ISNULL(database_id, 0) AS database_id, ISNULL(object_id, 0) AS object_id, CAST(NULL AS VARCHAR(256)) AS object_name
	INTO #distinctObjects
	FROM #tempQueryProgress
	-- Filter out any 0 values (e.g., for a spool, which uses 0 as a dummy value) as well as any NULL values
	WHERE database_id <> 0
		AND object_id <> 0
	ALTER TABLE #distinctObjects ADD PRIMARY KEY (database_id, object_id)

	-- Compute object_name from system catalog metadata so that NOLOCK can be used to prevent p_queryProgress from blocking
	-- if any of the ongoing queries are accessing an object that has been modified (SCH-M lock) during the transaction.
	DECLARE @dbId INT, @dbName VARCHAR(255), @sqlString VARCHAR(MAX)
	DECLARE p_queryProgress_dbNameCursor CURSOR LOCAL FAST_FORWARD FOR
	-- Set up a cursor over each distinct database so that we can target the system catalog tables on that database
	SELECT DISTINCT d.database_id, d.name
	FROM #distinctObjects o
	JOIN master.sys.databases d WITH (NOLOCK)
		ON d.database_id = o.database_id

	OPEN p_queryProgress_dbNameCursor
	FETCH NEXT FROM p_queryProgress_dbNameCursor INTO @dbId, @dbName

	WHILE @@FETCH_STATUS = 0
	BEGIN
		-- Capture the full object name for all objects on a particular database that are accessed by one or more scans/seeks in the current p_queryProgress results
		SET @sqlString = '
			UPDATE do
			SET do.object_name = ''' + @dbName + '.'' + s.name + ''.'' + o.name
			FROM #distinctObjects do
			JOIN ' + @dbName + '.sys.objects o WITH (NOLOCK)
				ON o.object_id = do.object_id
			JOIN ' + @dbName + '.sys.schemas s WITH (NOLOCK)
				ON s.schema_id = o.schema_id
			WHERE do.database_id = ' + CONVERT(VARCHAR(MAX), @dbId) + '
		'
		EXEC(@sqlString)

		FETCH NEXT FROM p_queryProgress_dbNameCursor INTO @dbId, @dbName
	END

	CLOSE p_queryProgress_dbNameCursor
	DEALLOCATE p_queryProgress_dbNameCursor

	-- Now that we have all the distinct object_names, merge this information into the raw data we collected
	UPDATE p
	SET p.object_name = do.object_name
	FROM #tempQueryProgress p
	JOIN #distinctObjects do
		ON do.database_id = p.database_id
		AND do.object_id = p.object_id

	/**********************************************************************
	 Build the final result set to display
	**********************************************************************/

	-- The default parameters, which include aggregating data across threads (to produce one row per query plan node)
	-- and including a very basic amount of thread-level information (how many threads, how many processed 1+ rows, max % of all rows processed by any single thread)
	DECLARE @threadDetailSelect VARCHAR(MAX) = '
				, ISNULL(NULLIF(COUNT(CASE WHEN thread_id > 0 THEN 1 END), 0), 1) AS threads /* For nodes executing in parallel, only count parallel threads. For serial nodes (only thread_id 0), return 1. */
				, COUNT(CASE WHEN row_count > 0 THEN 1 END) AS active_threads
				, FORMAT(1.0*MAX(row_count)/NULLIF(SUM(row_count),0), ''#,##0.00%'') AS [Tmax_%]'
	DECLARE @groupBy VARCHAR(MAX) = 'session_id, node_id, physical_operator_name'
	DECLARE @orderBy VARCHAR(MAX) = 'session_id, node_id'

	IF @threadDetail = 1 /* Add pivoted columns showing the % of rows that were processed by each thread */
	BEGIN
		SELECT @threadDetailSelect = @threadDetailSelect + '
				, FORMAT(1.0*SUM(CASE WHEN thread_id = ' + CAST(thread_id AS VARCHAR(MAX)) + ' THEN row_count END)/NULLIF(SUM(row_count),0), ''#,##0.00%'') AS [T' + CAST(thread_id AS VARCHAR(MAX)) + '_%]'
		FROM (
			SELECT DISTINCT thread_id FROM #tempQueryProgress WHERE thread_id > 0
		) t
		-- Add columns in order by thread_id; technically undocumented SQL functionality, but we can do this easily by appending
		-- to a variable in the SELECT statement and ensuring that the rows are processed serially and in the order we want.
		ORDER BY t.thread_id
		OPTION (MAXDOP 1)
	END
	ELSE IF @threadDetail = 2 /* Expand the result set to contain a row for each thread */
	BEGIN
		SELECT @threadDetailSelect = ''
		-- It's not 100% clear whether callers will prefer thread_id to come before or after node_id in the ordering of rows.
		-- For now, we've elected before since other use cases like not breaking out by thread or filtering to a specific node
		-- likely already cover use cases in which it might be preferential to have thread_id after node_id in the sort order.
		SELECT @groupBy = REPLACE(@groupBy, 'session_id', 'session_id, thread_id')
		SELECT @orderBy = REPLACE(@orderBy, 'session_id', 'session_id, thread_id')
	END

	-- Build and execute the SQL statement that will present the results to the user
	DECLARE @queryProgressResultsSql VARCHAR(MAX) = '
		SELECT ' + @groupBy + ', MIN(object_name) AS object_name, SUM(row_count) rows, SUM(estimate_row_count) AS estimate_rows,
			FORMAT(1.0*SUM(row_count)/NULLIF(SUM(estimate_row_count),0), ''#,##0.00%'') AS [%_complete],
			SUM(elapsed_time_ms) AS elapsed_ms,
			SUM(cpu_time_ms) AS cpu_ms,
			SUM(scan_count) AS scans,
			SUM(logical_read_count) AS logical_reads,
			SUM(physical_read_count) AS physical_reads,
			SUM(read_ahead_count) AS read_aheads,
			SUM(write_page_count) AS spill_pages,
			SUM(segment_read_count) AS segment_reads,
			SUM(segment_skip_count) AS segment_skips'
			+ @threadDetailSelect + '
		FROM #tempQueryProgress
		GROUP BY ' + @groupBy + '
		ORDER BY ' + @orderBy + '
	'
	EXEC(@queryProgressResultsSql)
END
GO
	/* Create a dummy version of the procedure (so that we can use ALTER PROCEDURE below) */
	IF object_id('dbo.p_queryProgress') IS NULL
	BEGIN
	EXEC('CREATE PROCEDURE dbo.p_queryProgress AS BEGIN RETURN END')
	END
	GO

	/* GP 11/6/2015 Add a stored procedure to view live query progress on SQL 2014+
	Note that only queries for which the actual execution plan is being captured will appear.
	The easiest way to do this is to run the query in SSMS with "Include Actual Execution Plan" on.

	Another way is to run an extended event session with the sqlserver.query_post_execution_showplan
	event on, but be careful about this option because it does add significant overhead.
	See http://sqlperformance.com/2013/03/sql-plan/showplan-impact for details on the overhead.

	Example usages:
	EXEC p_queryProgress
	EXEC p_queryProgress @threadDetail = 1 -- Add pivoted columns showing the distribution of rows across each thread
	EXEC p_queryProgress @threadDetail = 2 -- Expand the result set to have a separate row for each thread
	EXEC p_queryProgress @session_id = 100 -- Filter results to a single session
	EXEC p_queryProgress @node_id = 7 -- Filter results to a single node in the query plan (automatically expands result set to have a separate row for each thread)
	*/
	ALTER PROCEDURE dbo.p_queryProgress
	@threadDetail TINYINT = 0, /* 0, 1, or 2; see example usages */
	@session_id INT = NULL,
	@node_id INT = NULL
	AS
	BEGIN
	/**********************************************************************
	Input validation and processing
	**********************************************************************/

	-- Confirm SQL 2014+
	IF OBJECT_ID('sys.dm_exec_query_profiles') IS NULL
	BEGIN
	RAISERROR('p_queryProgress can only be run on a SQL 2014+ server.', 16, 1)
	RETURN
	END

	-- If viewing results for just a single node, automatically maximize thread detail
	IF @node_id IS NOT NULL
	SET @threadDetail = 2

	/**********************************************************************
	Raw data collection
	**********************************************************************/

	-- Capture all the raw data by thread from sys.dm_exec_query_profiles
	-- (See https://msdn.microsoft.com/en-us/library/dn223301.aspx for documentation)
	-- GP 4/6/2016 APT-84606 We now capture database_id/object_id, which we'll use to update object_name
	SELECT session_id, node_id, physical_operator_name, thread_id, row_count, estimate_row_count, elapsed_time_ms, cpu_time_ms,
	database_id, object_id, CAST(NULL AS VARCHAR(256)) AS object_name,
	scan_count, logical_read_count, physical_read_count, read_ahead_count, write_page_count, segment_read_count, segment_skip_count
	INTO #tempQueryProgress
	FROM sys.dm_exec_query_profiles WITH (NOLOCK)
	WHERE session_id <> @@SPID /* Avoid the session running p_queryProgress! */
	AND (@session_id IS NULL OR session_id = @session_id) /* If requested, filter to a single session */
	AND (@node_id IS NULL OR node_id = @node_id) /* If requested, filter to a single node in the query plan */
	-- Key the raw data
	ALTER TABLE #tempQueryProgress ADD UNIQUE CLUSTERED (session_id, node_id, thread_id)

	/**********************************************************************
	Collect object names for any seeks/scans
	**********************************************************************/

	-- Populate object_name from object_id/database_id in a manner that is not susceptible to being blocked by a SCH-M lock.
	-- Unfortunately, SQL Server does not provide support for invoking OBJECT_NAME() using the READ UNCOMMITTED isolation level.
	-- Therefore, the otherwise-very-convenient OBJECT_NAME(object_id, database_id) API cannot be used because it will cause
	-- p_queryProgress to block indefinitely if any of the ongoing queries access an object that has been modified (SCH-M lock) during the transaction.
	-- See http://connect.microsoft.com/SQL/feedback/ViewFeedback.aspx?FeedbackID=432497 for a relevant enhancement request.
	SELECT DISTINCT ISNULL(database_id, 0) AS database_id, ISNULL(object_id, 0) AS object_id, CAST(NULL AS VARCHAR(256)) AS object_name
	INTO #distinctObjects
	FROM #tempQueryProgress
	-- Filter out any 0 values (e.g., for a spool, which uses 0 as a dummy value) as well as any NULL values
	WHERE database_id <> 0
	AND object_id <> 0
	ALTER TABLE #distinctObjects ADD PRIMARY KEY (database_id, object_id)

	-- Compute object_name from system catalog metadata so that NOLOCK can be used to prevent p_queryProgress from blocking
	-- if any of the ongoing queries are accessing an object that has been modified (SCH-M lock) during the transaction.
	DECLARE @dbId INT, @dbName VARCHAR(255), @sqlString VARCHAR(MAX)
	DECLARE p_queryProgress_dbNameCursor CURSOR LOCAL FAST_FORWARD FOR
	-- Set up a cursor over each distinct database so that we can target the system catalog tables on that database
	SELECT DISTINCT d.database_id, d.name
	FROM #distinctObjects o
	JOIN master.sys.databases d WITH (NOLOCK)
	ON d.database_id = o.database_id

	OPEN p_queryProgress_dbNameCursor
	FETCH NEXT FROM p_queryProgress_dbNameCursor INTO @dbId, @dbName

	WHILE @@FETCH_STATUS = 0
	BEGIN
	-- Capture the full object name for all objects on a particular database that are accessed by one or more scans/seeks in the current p_queryProgress results
	SET @sqlString = '
	UPDATE do
	SET do.object_name = ''' + @dbName + '.'' + s.name + ''.'' + o.name
	FROM #distinctObjects do
	JOIN ' + @dbName + '.sys.objects o WITH (NOLOCK)
	ON o.object_id = do.object_id
	JOIN ' + @dbName + '.sys.schemas s WITH (NOLOCK)
	ON s.schema_id = o.schema_id
	WHERE do.database_id = ' + CONVERT(VARCHAR(MAX), @dbId) + '
	'
	EXEC(@sqlString)

	FETCH NEXT FROM p_queryProgress_dbNameCursor INTO @dbId, @dbName
	END

	CLOSE p_queryProgress_dbNameCursor
	DEALLOCATE p_queryProgress_dbNameCursor

	-- Now that we have all the distinct object_names, merge this information into the raw data we collected
	UPDATE p
	SET p.object_name = do.object_name
	FROM #tempQueryProgress p
	JOIN #distinctObjects do
	ON do.database_id = p.database_id
	AND do.object_id = p.object_id

	/**********************************************************************
	Build the final result set to display
	**********************************************************************/

	-- The default parameters, which include aggregating data across threads (to produce one row per query plan node)
	-- and including a very basic amount of thread-level information (how many threads, how many processed 1+ rows, max % of all rows processed by any single thread)
	DECLARE @threadDetailSelect VARCHAR(MAX) = '
	, ISNULL(NULLIF(COUNT(CASE WHEN thread_id > 0 THEN 1 END), 0), 1) AS threads /* For nodes executing in parallel, only count parallel threads. For serial nodes (only thread_id 0), return 1. */
	, COUNT(CASE WHEN row_count > 0 THEN 1 END) AS active_threads
	, FORMAT(1.0*MAX(row_count)/NULLIF(SUM(row_count),0), ''#,##0.00%'') AS [Tmax_%]'
	DECLARE @groupBy VARCHAR(MAX) = 'session_id, node_id, physical_operator_name'
	DECLARE @orderBy VARCHAR(MAX) = 'session_id, node_id'

	IF @threadDetail = 1 /* Add pivoted columns showing the % of rows that were processed by each thread */
	BEGIN
	SELECT @threadDetailSelect = @threadDetailSelect + '
	, FORMAT(1.0*SUM(CASE WHEN thread_id = ' + CAST(thread_id AS VARCHAR(MAX)) + ' THEN row_count END)/NULLIF(SUM(row_count),0), ''#,##0.00%'') AS [T' + CAST(thread_id AS VARCHAR(MAX)) + '_%]'
	FROM (
	SELECT DISTINCT thread_id FROM #tempQueryProgress WHERE thread_id > 0
	) t
	-- Add columns in order by thread_id; technically undocumented SQL functionality, but we can do this easily by appending
	-- to a variable in the SELECT statement and ensuring that the rows are processed serially and in the order we want.
	ORDER BY t.thread_id
	OPTION (MAXDOP 1)
	END
	ELSE IF @threadDetail = 2 /* Expand the result set to contain a row for each thread */
	BEGIN
	SELECT @threadDetailSelect = ''
	-- It's not 100% clear whether callers will prefer thread_id to come before or after node_id in the ordering of rows.
	-- For now, we've elected before since other use cases like not breaking out by thread or filtering to a specific node
	-- likely already cover use cases in which it might be preferential to have thread_id after node_id in the sort order.
	SELECT @groupBy = REPLACE(@groupBy, 'session_id', 'session_id, thread_id')
	SELECT @orderBy = REPLACE(@orderBy, 'session_id', 'session_id, thread_id')
	END

	-- Build and execute the SQL statement that will present the results to the user
	DECLARE @queryProgressResultsSql VARCHAR(MAX) = '
	SELECT ' + @groupBy + ', MIN(object_name) AS object_name, SUM(row_count) rows, SUM(estimate_row_count) AS estimate_rows,
	FORMAT(1.0*SUM(row_count)/NULLIF(SUM(estimate_row_count),0), ''#,##0.00%'') AS [%_complete],
	SUM(elapsed_time_ms) AS elapsed_ms,
	SUM(cpu_time_ms) AS cpu_ms,
	SUM(scan_count) AS scans,
	SUM(logical_read_count) AS logical_reads,
	SUM(physical_read_count) AS physical_reads,
	SUM(read_ahead_count) AS read_aheads,
	SUM(write_page_count) AS spill_pages,
	SUM(segment_read_count) AS segment_reads,
	SUM(segment_skip_count) AS segment_skips'
	+ @threadDetailSelect + '
	FROM #tempQueryProgress
	GROUP BY ' + @groupBy + '
	ORDER BY ' + @orderBy + '
	'
	EXEC(@queryProgressResultsSql)
	END
	GO