info@techdevops.com | 437-991-3573 | Data Engineering Services
TechDevOps.com
Resources Tools
Experts in Microsoft SQL Server on Windows, Linux, Containers | Clusters, Always On, FCI | Migrations, Cloud, Performance



Find Long Running Queries in SQL Server, Queries with High Execution Counts, Query Cache Analysis
by BF (Principal Consultant; Architecture; Engineering)
2017-12-12








Find Long Running Queries:

Declare @Min_Duration_µs int = 1000000 --1sec

SELECT
qs.last_execution_time as 'Last_Execution_Time',
qs.execution_count as 'Execution_Count',
db_name(qp.dbid) as 'Database',
Object_Name(qp.objectid) as 'Object',
qs.query_hash,
qs.query_plan_hash,
qs.last_rows as 'Last_Rows',
qs.last_logical_reads as 'Last_Logical_Reads',
qs.last_elapsed_time as 'Last_Elapsed_Time_µs',
qs.max_worker_time as 'Max_Worker_Time_µs',
--Total CPU time that the query took to execute. Queries with a high max_worker_time (CPU time) may be highly parallel queries.
--Maximum CPU time, reported in microseconds (but only accurate to milliseconds), that this plan has ever consumed during a single execution.
qs.max_elapsed_time as 'Max_Elapsed_Time_µs',
--Total Clock time that the query took to execute. Queries with a high max_elapsed_time just take a lot of time to run. Could be slow single-threaded queries.
--Maximum elapsed time, reported in microseconds (but only accurate to milliseconds), for any completed execution of this plan.
qs.total_logical_reads as 'Total_Logical_Reads',
qs.total_elapsed_time as 'Total_Elapsed_Time',
--qs.max_dop, --SQL 2016
--qs.last_dop,--SQL 2016
st.text,
qp.query_plan,
qs.*,
qp.*,
st.*
FROM (
SELECT TOP 50 * FROM sys.dm_exec_query_stats
--Returns aggregate performance statistics for cached query plans in SQL Server. The view contains one row per query statement within the cached plan, and the
--lifetime of the rows are tied to the plan itself. When a plan is removed from the cache, the corresponding rows are eliminated from this view.
ORDER BY total_worker_time DESC
) AS qs
CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) AS st
CROSS APPLY sys.dm_exec_query_plan(qs.plan_handle) AS qp
WHERE
qs.max_worker_time > @Min_Duration_µs
OR
qs.max_elapsed_time > @Min_Duration_µs
Order by qs.execution_count desc



Find Queries with Highest Execution Counts:

SELECT top 25
qs.last_execution_time as 'Last_Execution_Time',
qs.execution_count as 'Execution_Count',
db_name(qp.dbid) as 'Database',
Object_Name(qp.objectid) as 'Object',
(qs.execution_count / DATEDIFF( second, qs.creation_time, getdate())) as 'CallsPerSec',
qs.last_logical_reads as 'Last_Logical_Reads',
qs.last_elapsed_time as 'Last_Elapsed_Time_µs',
qs.last_worker_time as 'Last_worker_Time_µs',
(qs.total_elapsed_time / qs.execution_count) as 'Avg_ElapsedTime_Per_Excecution_µs',
(qs.total_worker_time / qs.execution_count) as 'Avg_CPUTime_Per_Execution_µs',
(qs.total_logical_reads / qs.execution_count) as 'Avg_Logical_Reads_Per_Execution',
qs.last_rows as 'Last_Rows',
qs.sql_handle,
qs.query_hash,
qs.query_plan_hash,
qs.plan_handle,
qs.max_worker_time as 'Max_Worker_Time_µs',
--Total CPU time that the query took to execute. Queries with a high max_worker_time (CPU time) may be highly parallel queries.
--Maximum CPU time, reported in microseconds (but only accurate to milliseconds), that this plan has ever consumed during a single execution.
qs.max_elapsed_time as 'Max_Elapsed_Time_µs',
--Total Clock time that the query took to execute. Queries with a high max_elapsed_time just take a lot of time to run. Could be slow single-threaded queries.
--Maximum elapsed time, reported in microseconds (but only accurate to milliseconds), for any completed execution of this plan.
qs.total_logical_reads as 'Total_Logical_Reads',
qs.total_elapsed_time as 'Total_Elapsed_Time',
qs.total_worker_time as 'Total_Worker_Time',
--qs.max_dop, --SQL 2016
--qs.last_dop,--SQL 2016
st.text
FROM
sys.dm_exec_query_stats qs
CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) AS st
CROSS APPLY sys.dm_exec_query_plan(qs.plan_handle) AS qp
INNER JOIN sys.databases d ON st.dbid = d.database_id
INNER JOIN sys.dm_exec_cached_plans cp ON cp.plan_handle = qs.plan_handle
ORDER BY qs.execution_count DESC;



Query Cache Analysis:

--Query Cache Total Size:
Select sum(cast(size_in_bytes as bigint))/1024/1024/1024 as'Query Cache Size (GB)' from sys.dm_exec_cached_plans

--Query Cache Totals By ObjType:
SELECT objtype AS [ObjType (CacheType)],
SUM(CAST(size_in_bytes AS DECIMAL(18, 2))) / 1024 / 1024 AS [Total MBs],
COUNT_BIG(*) AS [Total Plans],
AVG(usecounts) AS [Avg Use Count],
SUM(CAST((CASE WHEN usecounts = 1 THEN size_in_bytes ELSE 0 END) AS DECIMAL(18, 2))) / 1024 / 1024 AS [Total MBs – usecount 1],
SUM(CASE WHEN usecounts = 1 THEN 1 ELSE 0 END) AS [Total Plans – usecount 1],
Convert(Decimal(18,1),(Convert(Decimal(18,1),SUM(CASE WHEN usecounts = 1 THEN 1 ELSE 0 END)) / COUNT_BIG(*)) * 100) as [Total Plans – usecount 1 - %]
FROM sys.dm_exec_cached_plans
GROUP BY objtype
ORDER BY [Total MBs – usecount 1] DESC
GO

--Single-use Plans - Prepared (where usecounts=1)
SELECT text, cp.objtype, cp.size_in_bytes, cp.usecounts
FROM sys.dm_exec_cached_plans AS cp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) st
WHERE cp.cacheobjtype = N'Compiled Plan' AND cp.objtype IN (N'Prepared') AND cp.usecounts = 1
ORDER BY cp.size_in_bytes DESC
OPTION (RECOMPILE);
--Ex. FROM [dbo].[SEARCH] WHERE id = @ID

--Single-use Plans - AdHoc (where usecounts=1)
SELECT text, cp.objtype, cp.size_in_bytes, cp.usecounts
FROM sys.dm_exec_cached_plans AS cp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) st
WHERE cp.cacheobjtype = N'Compiled Plan' AND cp.objtype IN (N'Adhoc') AND cp.usecounts = 1
ORDER BY cp.size_in_bytes DESC
OPTION (RECOMPILE);
--Ex. FROM [dbo].[SEARCH] WITH (NOLOCK) WHERE id = '09102947382017583'


--Stored Procedures:

--Top 5 Stored Procedures in Cached Plans By UseCounts
Select top 5 * from sys.dm_exec_cached_plans where cacheobjtype = N'Compiled Plan' and objtype IN (N'PROC') order by usecounts desc

--# Cached Plans for a specific Stored Procedure
SELECT plan_handle,usecounts, cacheobjtype, objtype, size_in_bytes, text, query_plan
FROM sys.dm_exec_cached_plans
CROSS APPLY sys.dm_exec_sql_text(plan_handle)
CROSS APPLY sys.dm_exec_query_plan(plan_handle)
WHERE usecounts > 1
AND text LIKE '%sprSearchListingGet%'
AND objtype = 'Proc'
ORDER BY usecounts DESC;

--All Stored Procedures
SELECT text, cp.objtype, cp.size_in_bytes, cp.usecounts, object_name(qp.objectid) as 'Object'
FROM sys.dm_exec_cached_plans AS cp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) st
CROSS APPLY sys.dm_exec_query_plan (cp.plan_handle) qp
WHERE
cp.cacheobjtype = N'Compiled Plan'
AND cp.objtype IN (N'PROC')
ORDER BY cp.usecounts DESC
OPTION (RECOMPILE);


Prepared Query(Parameters or Stored Procedure) vs Ad Hoc Query:

A Prepared query is paramaterized(defined by parameters) and can be reused for a range of different inputs. (select * from t1 where id = @id;)(Or a Stored Procedure)
A prepared statement goes through a parameterization process and the parameters can be sniffed, sampled, by the optimizer.
Using sp_executesql also creates a prepared statement

An Adhoc query is hard-coded and the cached plan can only be re-used for a near identical statement. (select * from t1 where id = 123;)
Ad hoc cannot have parameters, but can have local variables (prepared statement can also have local variables).
An hoc / non-prepared query bloats the plan cache with numerous single-use queries

https://technet.microsoft.com/en-us/library/ms175528(v=sql.105).aspx


Parallelism:
For each query - better if went Parallel? Parallel Inhibitors?
Fast Queries - rather not Parallel / MAXDOP = Low; CTP = High. Long/Slow Queries - rather Parallel / MaxDop = High; CTP = Low. Need find balance and what % queries are OLTP vs DSS.
Analyze your Query Cache and find out what queries went Parallel and which ones did not.
sp_configure - the defaults are usually not ideal for most MSSQL: CTP=5 and MAXDOP = 0. Follow MS Guidelines/Formula for MAXDOP.
Get a list Parallel Query Plans and their Subtree Costs. Use this as data to analyze when setting your Cost Threshold For Parallelism

Common Strategy:
CTP:
Assumming your OLTP queries are at an average Cost/Subtree Cost of 30. This means if you set CTP to 5, some of those many go Parallel and not be ideal.
Set the CTP just above your OLTP Queries cost so those do not go Parallel. Then any DSS/Long-Running queries, which have a higher Cost/Subtree Cost around 50 as an example will go Parallell.
Example: CTP = 40: Anything with Cost/Subtree below 40 will be single-treaded. Anything with a Cost/Subtree above 40 will have the option to go Parallel.

MAXDOP:
MAXTOP - OLDP - Keep Low as if queries are OLTP/Fast you want them on a single thread/cpu.
MAXDOP - DSS - Keep High since long queries you want them going parallel / multlplie cpu / multi-threaded


--Find Queries that went Parallel:
;WITH XMLNAMESPACES
(DEFAULT 'http://schemas.microsoft.com/sqlserver/2004/07/showplan')
SELECT
TOP 50
DB_NAME(dbid) as 'Database'
, cp.usecounts as 'UseCounts'
--usecounts: Number of times the cache object has been looked up.
, cp.refcounts as 'RefCounts'
--refcounts: Number of cache objects that are referencing this cache object. Refcounts must be at least 1 for an entry to be in the cache.
, n.value('(@StatementSubTreeCost)[1]', 'VARCHAR(128)') AS StatementSubTreeCost
, query_plan AS CompleteQueryPlan
, n.value('(@StatementText)[1]', 'VARCHAR(4000)') AS StatementText
, n.value('(@StatementOptmLevel)[1]', 'VARCHAR(25)') AS StatementOptimizationLevel
, n.query('.') AS ParallelSubTreeXML
, cp.size_in_bytes
, cp.cacheobjtype
, cp.objtype
--, OBJECT_NAME(qp.objectid)
FROM sys.dm_exec_cached_plans AS cp
CROSS APPLY sys.dm_exec_query_plan(plan_handle) AS qp
CROSS APPLY query_plan.nodes('/ShowPlanXML/BatchSequence/Batch/Statements/StmtSimple') AS qn(n)
WHERE n.query('.').exist('//RelOp[@PhysicalOp="Parallelism"]') = 1
Order by cp.usecounts desc;

--Find Missing Indexes:
;WITH XMLNAMESPACES(DEFAULT N'http://schemas.microsoft.com/sqlserver/2004/07/showplan')
SELECT DB_NAME(qp.dbid) as 'DB', cp.usecounts, cp.refcounts, cp.objtype, cp.cacheobjtype, st.dbid, st.text, qp.query_plan
FROM sys.dm_exec_cached_plans AS cp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) AS st
CROSS APPLY sys.dm_exec_query_plan(cp.plan_handle) AS qp
WHERE qp.query_plan.exist(N'/ShowPlanXML/BatchSequence/Batch/Statements/StmtSimple/QueryPlan/MissingIndexes/MissingIndexGroup') <> 0
ORDER BY cp.usecounts DESC

--Find Queries with Implicit Conversion Warning:
;WITH XMLNAMESPACES(DEFAULT N'http://schemas.microsoft.com/sqlserver/2004/07/showplan')
SELECT
qs.query_hash, qs.query_plan_hash, st.text, Object_Name(qp.objectid),
operators.value('@ConvertIssue', 'nvarchar(250)') as 'ConvertIssue',
operators.value('@Expression', 'nvarchar(250)') as 'Expression',
qp.query_plan
FROM sys.dm_exec_query_stats qs
CROSS APPLY sys.dm_exec_query_plan(qs.plan_handle) qp
CROSS APPLY sys.dm_exec_sql_text (qs.plan_handle) st
CROSS APPLY query_plan.nodes('//Warnings/PlanAffectingConvert') rel(operators)

Search All Query Cache Relational Operators: (Table Scans, etc)
;WITH XMLNAMESPACES(DEFAULT N'http://schemas.microsoft.com/sqlserver/2004/07/showplan')
SELECT
cp.query_hash,
cp.query_plan_hash,
operators.value('@EstimatedTotalSubtreeCost','nvarchar(50)') as 'EstimatedTotalSubtreeCost',
operators.value('@PhysicalOp','nvarchar(50)') as 'PhysicalOperator',
operators.value('@LogicalOp','nvarchar(50)') as 'LogicalOperator',
operators.value('@Parallel','nvarchar(50)') as 'Parallel',
operators.value('@EstimateRows','nvarchar(50)') as 'EstimateRows',
operators.value('@AvgRowSize','nvarchar(50)') as 'AvgRowSize',
operators.value('@EstimateCPU','nvarchar(50)') as 'EstimateCPU',
operators.value('@EstimateIO','nvarchar(50)') as 'EstimateIO',
operators.value('@EstimateRebinds','nvarchar(50)') as 'EstimateRebinds',
operators.value('@EstimateRewinds','nvarchar(50)') as 'EstimateRewinds',
operators.value('@NodeId','nvarchar(50)') as 'NodeId',
qp.query_plan
FROM sys.dm_exec_query_stats cp
CROSS APPLY sys.dm_exec_query_plan(cp.plan_handle) qp
CROSS APPLY query_plan.nodes('//RelOp') rel(operators)
Where cp.query_hash = 0x23B13B3917FBC89C;
--Where operators.value('@PhysicalOp','nvarchar(50)') in ('Table Scan')

Query Plan XML: (StmtSimple, QueryPlan, RelOp)
The XML provides all the information available about the query plan in a single document. You can also reference the schema that the XML is written against by going to http://schemas.microsoft.com/sqlserver/2004/07/showplan.
<StmtSimple> specifies the statement text, SET options and other details about the statement. Within this <StmtSimple> element, is the <QueryPlan> element, which is the main part of the execution plan.
The <QueryPlan> element contains <RelOp> elements. Each <RelOp> element represents one of the Plan’s Operations, following the same hierarchical structure as the graphical execution plan.

http://schemas.microsoft.com/sqlserver/2004/07/showplan/

Example:
<StmtSimple>
<QueryPlan>
<RelOp PhysicalOp="Merge Join" NodeId="0">
<RelOp PhysicalOp="Cluster Index Scan" NodeId=1">
</RelOp PhysicalOp="Cluster Index Scan" NodeId=1">
<RelOp PhysicalOp="Compute Scalar" NodeId=2">
<RelOp PhysicalOp="Compute Scalar" NodeId=3">
<RelOp PhysicalOp="Compute Scalar" NodeId=4">
</RelOp PhysicalOp="Compute Scalar" NodeId=4">
</RelOp PhysicalOp="Compute Scalar" NodeId=3">
</RelOp PhysicalOp="Compute Scalar" NodeId=2">
</RelOp PhysicalOp="Merge Join" NodeId="0">
</QueryPlan>
</StmtSimple>

Each element provides complete details of the Operation and includes information such as defined values, output list, estimated I/O, estimated CPU, etc.

Find Queries With Estimated Rows vary from Last_Rows:
Detecting Cardinality Estimate Issues
Possible Parameter Sniffing Issue(Check Execution Plam XML..ParameterCompiledValue - Compare Parameter Compiled Vaue vs Passed in value and the Data Distribution/Records returned for both)

Declare @EstimatedRowsDiffValue int = 25000;

SELECT TOP 25 * FROM
(
SELECT
st.text as 'Query_text',
SUBSTRING(st.text, statement_start_offset/2+1,((case when statement_end_offset = -1 then datalength(text) else statement_end_offset end - statement_start_offset)/2) + 1) as 'Stmt_text',
CAST(p.query_plan AS XML) AS 'query_plan',
qs.last_execution_time,
qs.last_elapsed_time,
CAST(p.query_plan AS XML).value('(//@EstimateRows)[1]', 'VARCHAR(128)') AS 'estimated_rows',
qs.last_rows
FROM sys.dm_exec_query_stats qs
CROSS APPLY sys.dm_exec_sql_text (sql_handle) st
CROSS APPLY sys.dm_exec_text_query_plan (plan_handle,statement_start_offset,statement_end_offset) p
) AS t1
WHERE TRY_CONVERT(DECIMAL,t1.estimated_rows) IS NOT NULL
AND CONVERT(DECIMAL,t1.estimated_rows) > last_rows
AND ABS(CONVERT(DECIMAL,t1.estimated_rows) - last_rows) > @EstimatedRowsDiffValue
ORDER BY ABS(CONVERT(DECIMAL,t1.estimated_rows) - last_rows) DESC

Plan Cache Internals:

sys.dm_os_memory_cache_counters:
Returns a snapshot of the health of a cache in SQL Server. sys.dm_os_memory_cache_counters provides run-time information about the cache
entries allocated, their use, and the source of memory for the cache entries.

Cache Stores - SQL Server’s plan cache is made up of four separate memory areas, called cache stores.

Select * from sys.dm_os_memory_cache_counters where [type] in ('CACHESTORE_OBJCP', 'CACHESTORE_SQLCP', 'CACHESTORE_PHDR', 'CACHESTORE_XPROC') order by pages_kb desc

Select sum(pages_kb)/1024/1024 as 'pages_kb total size (GB)' from sys.dm_os_memory_cache_counters where [type] in ('CACHESTORE_OBJCP', 'CACHESTORE_SQLCP', 'CACHESTORE_PHDR', 'CACHESTORE_XPROC')

Object Plans (CACHESTORE_OBJCP)
Object Plans include plans for stored procedures, functions, and triggers
SQL Plans (CACHESTORE_SQLCP)
SQL Plans include the plans for adhoc cached plans, autoparameterized plans, and prepared plans.
Bound Trees (CACHESTORE_PHDR)
Bound Trees are the structures produced by SQL Server’s algebrizer for views, constraints, and defaults.
Extended Stored Procedures (CACHESTORE_XPROC)
Extended Procs (Xprocs) are predefined system procedures, like sp_executeSql and sp_tracecreate, that are defined using a DLL, not using Transact-SQL statements. The cached structure contains only the function name and the DLL name in which the procedure is implemented.