From f4df0b2143db387a4b3d1b4adda9c3f1bb72ed88 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Thu, 9 Apr 2026 10:02:29 -0400 Subject: [PATCH] Address issue #178 round 3 feedback (items 17-25) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Address issue #178 round 3 feedback (items 17-22) - #17: Bump SubTreeCost threshold from 0.01 to 1.0 (Rules 3 & 20) — CTFP is an integer - #18: Smarter MaxDOPSetToOne severity — check query text for MAXDOP 1, detect truncation - #19: Already fixed by #20 - #20: Scope "allocated resources" message to Hash/Sort/Spool operators only - #21: Fix non-SARGable false positive when function is on parameter side - #22: Enrich parallelism warnings with targeted wait stats advice - #23: Enrich scan-with-predicate with cost %, elapsed %, row selectivity; elevate to Critical Co-Authored-By: Claude Opus 4.6 (1M context) * Fix parser skipping statements after DECLARE in multi-statement batches A Batch element can contain multiple children (e.g., one for DECLARE and one for the SELECT). The parser used .Element() which only reads the first, causing "no plan loaded" when the actual query plan was in the second block. Changed to .Elements() to iterate all of them. Reported by Joe Obbish via email (related to issue #178). Co-Authored-By: Claude Opus 4.6 (1M context) * Show DECLARE/ASSIGN statements in plan viewer with proper icon Statements with no QueryPlan (like DECLARE/ASSIGN) were filtered from the statement tab list because they had no RootNode. Now creates a synthetic root node with the appropriate icon (assign/declare) so they appear as tabs alongside the actual query plans. Co-Authored-By: Claude Opus 4.6 (1M context) * Add Rules 32 and 33: scan cardinality misestimate and CE guess detection Rule 32 (actual plans): When an expensive scan (>= 50% of plan) has a >= 10x row overestimate and < 10% selectivity, flag that the bad estimate likely caused the optimizer to choose a scan over a seek. Rule 33 (estimated plans): Detect well-known CE default selectivity guesses (30%, 10%, 9%, ~16.4%, 1%) on expensive scans against large tables (>= 100K rows). These patterns suggest the optimizer is using a default guess instead of accurate statistics. Addresses issue #178 items #24 and #25 (Joe Obbish feedback). Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- src/PlanViewer.Core/Services/PlanAnalyzer.cs | 297 ++++++++++++++++-- .../Services/ShowPlanParser.cs | 27 +- 2 files changed, 303 insertions(+), 21 deletions(-) diff --git a/src/PlanViewer.Core/Services/PlanAnalyzer.cs b/src/PlanViewer.Core/Services/PlanAnalyzer.cs index c2b2fe6..f2e052d 100644 --- a/src/PlanViewer.Core/Services/PlanAnalyzer.cs +++ b/src/PlanViewer.Core/Services/PlanAnalyzer.cs @@ -62,7 +62,9 @@ public static void Analyze(ParsedPlan plan, AnalyzerConfig? config = null) [24] = "Top Above Scan", [25] = "Ineffective Parallelism", [26] = "Row Goal", [27] = "Optimize For Unknown", [28] = "NOT IN with Nullable Column", [29] = "Implicit Conversion", [30] = "Wide Index Suggestion", - [31] = "Parallel Wait Bottleneck" + [31] = "Parallel Wait Bottleneck", + [32] = "Scan Cardinality Misestimate", + [33] = "Estimated Plan CE Guess" }; // Reverse lookup: WarningType → rule number @@ -124,10 +126,11 @@ private static void TryOverrideSeverity(PlanWarning warning, AnalyzerConfig cfg) private static void AnalyzeStatement(PlanStatement stmt, AnalyzerConfig cfg) { // Rule 3: Serial plan with reason - // Skip: trivial cost (< 0.01), TRIVIAL optimization (can't go parallel anyway), + // Skip: cost < 1 (CTFP is an integer so cost < 1 can never go parallel), + // TRIVIAL optimization (can't go parallel anyway), // and 0ms actual elapsed time (not worth flagging). if (!cfg.IsRuleDisabled(3) && !string.IsNullOrEmpty(stmt.NonParallelPlanReason) - && stmt.StatementSubTreeCost >= 0.01 + && stmt.StatementSubTreeCost >= 1.0 && stmt.StatementOptmLevel != "TRIVIAL" && !(stmt.QueryTimeStats != null && stmt.QueryTimeStats.ElapsedTimeMs == 0)) { @@ -194,12 +197,46 @@ private static void AnalyzeStatement(PlanStatement stmt, AnalyzerConfig cfg) or "NoParallelWithRemoteQuery" or "NoRemoteParallelismForMatrix"; - stmt.PlanWarnings.Add(new PlanWarning + // MaxDOPSetToOne needs special handling: check whether the user explicitly + // set MAXDOP 1 in the query text, or if it's a server/db/RG setting. + // SQL Server truncates StatementText at ~4,000 characters in plan XML. + if (stmt.NonParallelPlanReason == "MaxDOPSetToOne") { - WarningType = "Serial Plan", - Message = $"Query running serially: {reason}.", - Severity = isActionable ? PlanWarningSeverity.Warning : PlanWarningSeverity.Info - }); + var text = stmt.StatementText ?? ""; + var hasMaxdop1InText = Regex.IsMatch(text, @"MAXDOP\s+1\b", RegexOptions.IgnoreCase); + var isTruncated = text.Length >= 3990; + + if (hasMaxdop1InText) + { + // User explicitly set MAXDOP 1 in the query — warn + stmt.PlanWarnings.Add(new PlanWarning + { + WarningType = "Serial Plan", + Message = $"Query running serially: {reason}.", + Severity = PlanWarningSeverity.Warning + }); + } + else if (isTruncated) + { + // Query text was truncated — can't tell if MAXDOP 1 is in the query + stmt.PlanWarnings.Add(new PlanWarning + { + WarningType = "Serial Plan", + Message = $"Query running serially: {reason}. MAXDOP 1 may be set at the server, database, resource governor, or query level (query text was truncated).", + Severity = PlanWarningSeverity.Info + }); + } + // else: not truncated, no MAXDOP 1 in text — server/db/RG setting, suppress entirely + } + else + { + stmt.PlanWarnings.Add(new PlanWarning + { + WarningType = "Serial Plan", + Message = $"Query running serially: {reason}.", + Severity = isActionable ? PlanWarningSeverity.Warning : PlanWarningSeverity.Info + }); + } } // Rule 9: Memory grant issues (statement-level) @@ -292,8 +329,8 @@ private static void AnalyzeStatement(PlanStatement stmt, AnalyzerConfig cfg) // Rule 20: Local variables without RECOMPILE // Parameters with no CompiledValue are likely local variables — the optimizer // cannot sniff their values and uses density-based ("unknown") estimates. - // Skip trivial statements (simple variable assignments) where estimate quality doesn't matter. - if (!cfg.IsRuleDisabled(20) && stmt.Parameters.Count > 0 && stmt.StatementSubTreeCost >= 0.01) + // Skip statements with cost < 1 (can't go parallel, estimate quality rarely matters). + if (!cfg.IsRuleDisabled(20) && stmt.Parameters.Count > 0 && stmt.StatementSubTreeCost >= 1.0) { var unsnifffedParams = stmt.Parameters .Where(p => string.IsNullOrEmpty(p.CompiledValue)) @@ -348,28 +385,33 @@ private static void AnalyzeStatement(PlanStatement stmt, AnalyzerConfig cfg) var speedup = (double)cpu / elapsed; var efficiency = Math.Max(0.0, Math.Min(100.0, (speedup - 1.0) / (dop - 1.0) * 100.0)); + // Build targeted advice from wait stats if available + var waitAdvice = GetWaitStatsAdvice(stmt.WaitStats); + if (speedup < 0.5 && !cfg.IsRuleDisabled(31)) { // CPU well below Elapsed: threads are waiting, not doing CPU work var waitPct = (1.0 - speedup) * 100; + var advice = waitAdvice ?? "Common causes include spills to tempdb, physical I/O reads, lock or latch contention, and memory grant waits."; stmt.PlanWarnings.Add(new PlanWarning { WarningType = "Parallel Wait Bottleneck", Message = $"Parallel plan (DOP {dop}, {efficiency:N0}% efficient) with elapsed time ({elapsed:N0}ms) exceeding CPU time ({cpu:N0}ms). " + $"Approximately {waitPct:N0}% of elapsed time was spent waiting rather than on CPU. " + - $"Common causes include spills to tempdb, physical I/O reads, lock or latch contention, and memory grant waits.", + advice, Severity = PlanWarningSeverity.Warning }); } else if (efficiency < 40) { // CPU >= Elapsed but well below DOP potential — parallelism is ineffective + var advice = waitAdvice ?? "Look for parallel thread skew, blocking exchanges, or serial zones in the plan that prevent effective parallel execution."; stmt.PlanWarnings.Add(new PlanWarning { WarningType = "Ineffective Parallelism", Message = $"Parallel plan (DOP {dop}) is only {efficiency:N0}% efficient — CPU time ({cpu:N0}ms) vs elapsed time ({elapsed:N0}ms). " + $"At DOP {dop}, ideal CPU time would be ~{elapsed * dop:N0}ms. " + - $"Look for parallel thread skew, blocking exchanges, or serial zones in the plan that prevent effective parallel execution.", + advice, Severity = efficiency < 20 ? PlanWarningSeverity.Critical : PlanWarningSeverity.Warning }); } @@ -575,8 +617,11 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi { if (node.ActualRows == 0) { - // Zero rows is always worth noting — resources were allocated for nothing - if (node.EstimateRows >= 100) + // Zero rows with a significant estimate — only warn on operators that + // actually allocate meaningful resources (memory grants for hash/sort/spool). + // Skip Parallelism, Bitmap, Compute Scalar, Filter, Concatenation, etc. + // where 0 rows is just a consequence of upstream filtering. + if (node.EstimateRows >= 100 && AllocatesResources(node)) { node.Warnings.Add(new PlanWarning { @@ -777,14 +822,80 @@ _ when nonSargableReason.StartsWith("Function call") => !IsProbeOnly(node.Predicate)) { var displayPredicate = StripProbeExpressions(node.Predicate); + var details = BuildScanImpactDetails(node, stmt); + var severity = PlanWarningSeverity.Warning; + + // Elevate to Critical if the scan dominates the plan + if (details.CostPct >= 90 || details.ElapsedPct >= 90) + severity = PlanWarningSeverity.Critical; + + var message = "Scan with residual predicate — SQL Server is reading every row and filtering after the fact."; + if (!string.IsNullOrEmpty(details.Summary)) + message += $" {details.Summary}"; + message += " Check that you have appropriate indexes."; + message += $"\nPredicate: {Truncate(displayPredicate, 200)}"; + node.Warnings.Add(new PlanWarning { WarningType = "Scan With Predicate", - Message = $"Scan with residual predicate — SQL Server is reading every row and filtering after the fact. Check that you have appropriate indexes.\nPredicate: {Truncate(displayPredicate, 200)}", - Severity = PlanWarningSeverity.Warning + Message = message, + Severity = severity }); } + // Rule 32: Cardinality misestimate on expensive scan — likely preventing index usage + // When a scan dominates the plan AND the estimate is vastly higher than actual rows, + // the optimizer chose a scan because it thought it needed most of the table. + // With accurate estimates, it would likely seek instead. + if (!cfg.IsRuleDisabled(32) && node.HasActualStats && IsRowstoreScan(node) + && node.EstimateRows > 0 && node.ActualRows >= 0 && node.ActualRowsRead > 0) + { + var impact = BuildScanImpactDetails(node, stmt); + var overestimateRatio = node.EstimateRows / Math.Max(1.0, node.ActualRows); + var selectivity = (double)node.ActualRows / node.ActualRowsRead; + + // Fire when: scan is >= 50% of plan, estimate is >= 10x actual, and < 10% selectivity + if ((impact.CostPct >= 50 || impact.ElapsedPct >= 50) + && overestimateRatio >= 10.0 + && selectivity < 0.10) + { + node.Warnings.Add(new PlanWarning + { + WarningType = "Scan Cardinality Misestimate", + Message = $"Estimated {node.EstimateRows:N0} rows but only {node.ActualRows:N0} returned ({selectivity * 100:N3}% of {node.ActualRowsRead:N0} rows read). " + + $"The {overestimateRatio:N0}x overestimate likely caused the optimizer to choose a scan instead of a seek. " + + $"An index on the predicate columns could dramatically reduce I/O.", + Severity = PlanWarningSeverity.Critical + }); + } + } + + // Rule 33: Estimated plan CE guess detection — scans with telltale default selectivity + // When the optimizer uses a local variable or can't sniff, it falls back to density-based + // guesses: 30% (equality), 10% (inequality), 9% (LIKE/between), ~16.43% (sqrt(30%)), + // 1% (multi-inequality). On large tables, these guesses can hide the need for an index. + if (!cfg.IsRuleDisabled(33) && !node.HasActualStats && IsRowstoreScan(node) + && node.TableCardinality >= 100_000 && node.EstimateRows > 0 + && !string.IsNullOrEmpty(node.Predicate)) + { + var impact = BuildScanImpactDetails(node, stmt); + if (impact.CostPct >= 50) + { + var guessDesc = DetectCeGuess(node.EstimateRows, node.TableCardinality); + if (guessDesc != null) + { + node.Warnings.Add(new PlanWarning + { + WarningType = "Estimated Plan CE Guess", + Message = $"Estimated {node.EstimateRows:N0} rows from {node.TableCardinality:N0} row table — {guessDesc}. " + + $"The optimizer may be using a default guess instead of accurate statistics. " + + $"If actual selectivity is much lower, an index on the predicate columns could help significantly.", + Severity = PlanWarningSeverity.Warning + }); + } + } + } + // Rule 13: Mismatched data types (GetRangeWithMismatchedTypes / GetRangeThroughConvert) if (!cfg.IsRuleDisabled(13) && node.PhysicalOp == "Compute Scalar" && !string.IsNullOrEmpty(node.DefinedValues)) { @@ -1186,12 +1297,14 @@ private static bool IsScanOperator(PlanNode node) if (Regex.IsMatch(predicate, @"\b(isnull|coalesce)\s*\(", RegexOptions.IgnoreCase)) return "ISNULL/COALESCE wrapping column"; - // Common function calls on columns + // Common function calls on columns — but only if the function wraps a column, + // not a parameter/variable. Split on comparison operators to check which side + // the function is on. Predicate format: [db].[schema].[table].[col]>func(...) var funcMatch = FunctionInPredicateRegex.Match(predicate); if (funcMatch.Success) { var funcName = funcMatch.Groups[1].Value.ToUpperInvariant(); - if (funcName != "CONVERT_IMPLICIT") + if (funcName != "CONVERT_IMPLICIT" && IsFunctionOnColumnSide(predicate, funcMatch)) return $"Function call ({funcName}) on column"; } @@ -1202,6 +1315,36 @@ private static bool IsScanOperator(PlanNode node) return null; } + /// + /// Checks whether a function call in a predicate is on the column side of the comparison. + /// Predicate ScalarStrings look like: [db].[schema].[table].[col]>dateadd(day,(0),[@var]) + /// If the function is only on the parameter/literal side, it's still SARGable. + /// + private static bool IsFunctionOnColumnSide(string predicate, Match funcMatch) + { + // Find the comparison operator that splits the predicate into left/right sides. + // Operators in ScalarString: >=, <=, <>, >, <, = + var compMatch = Regex.Match(predicate, @"(?])([<>=!]{1,2})(?![<>=])"); + if (!compMatch.Success) + return true; // No comparison found — can't determine side, assume worst case + + var compPos = compMatch.Index; + var funcPos = funcMatch.Index; + + // Determine which side the function is on + var funcSide = funcPos < compPos ? "left" : "right"; + + // Check if that side also contains a column reference [...].[...].[...] + string side = funcSide == "left" + ? predicate[..compPos] + : predicate[(compPos + compMatch.Length)..]; + + // Column references are multi-part bracket-qualified: [schema].[table].[column] + // Variables are [@var] or [@var] — single bracket pair with @ prefix. + // Match [identifier].[identifier] (at least two dotted parts) to distinguish columns. + return Regex.IsMatch(side, @"\[[^\]@]+\]\.\["); + } + /// /// Detects CTEs that are referenced more than once in the statement text. /// Each reference re-executes the CTE since SQL Server does not materialize them. @@ -1438,6 +1581,27 @@ private static string QuantifyFilterImpact(PlanNode filterNode) return string.Join("\n", parts.Select(p => "• " + p)); } + /// + /// Detects well-known CE default selectivity guesses by comparing EstimateRows to TableCardinality. + /// Returns a description of the guess pattern, or null if no known pattern matches. + /// + private static string? DetectCeGuess(double estimateRows, double tableCardinality) + { + if (tableCardinality <= 0) return null; + var selectivity = estimateRows / tableCardinality; + + // Known CE guess selectivities with a 2% tolerance band + return selectivity switch + { + >= 0.29 and <= 0.31 => $"matches the 30% equality guess ({selectivity * 100:N1}%)", + >= 0.098 and <= 0.102 => $"matches the 10% inequality guess ({selectivity * 100:N1}%)", + >= 0.088 and <= 0.092 => $"matches the 9% LIKE/BETWEEN guess ({selectivity * 100:N1}%)", + >= 0.155 and <= 0.175 => $"matches the ~16.4% compound predicate guess ({selectivity * 100:N1}%)", + >= 0.009 and <= 0.011 => $"matches the 1% multi-inequality guess ({selectivity * 100:N1}%)", + _ => null + }; + } + private static long SumSubtreeReads(PlanNode node) { long reads = node.ActualLogicalReads; @@ -1447,6 +1611,52 @@ private static long SumSubtreeReads(PlanNode node) } /// + private record ScanImpact(double CostPct, double ElapsedPct, string? Summary); + + /// + /// Builds impact details for a scan node: what % of plan time/cost it represents, + /// and what fraction of rows survived filtering. + /// + private static ScanImpact BuildScanImpactDetails(PlanNode node, PlanStatement stmt) + { + var parts = new List(); + + // % of plan cost + double costPct = 0; + if (stmt.StatementSubTreeCost > 0 && node.EstimatedTotalSubtreeCost > 0) + { + costPct = node.EstimatedTotalSubtreeCost / stmt.StatementSubTreeCost * 100; + if (costPct >= 50) + parts.Add($"This scan is {costPct:N0}% of the plan cost."); + } + + // % of elapsed time (actual plans) + double elapsedPct = 0; + if (node.HasActualStats && node.ActualElapsedMs > 0 && + stmt.QueryTimeStats != null && stmt.QueryTimeStats.ElapsedTimeMs > 0) + { + elapsedPct = (double)node.ActualElapsedMs / stmt.QueryTimeStats.ElapsedTimeMs * 100; + if (elapsedPct >= 50) + parts.Add($"This scan took {elapsedPct:N0}% of elapsed time."); + } + + // Row selectivity: rows returned vs rows read (actual) or vs table cardinality (estimated) + if (node.HasActualStats && node.ActualRowsRead > 0 && node.ActualRows < node.ActualRowsRead) + { + var selectivity = (double)node.ActualRows / node.ActualRowsRead * 100; + if (selectivity < 10) + parts.Add($"Only {selectivity:N3}% of rows survived filtering ({node.ActualRows:N0} of {node.ActualRowsRead:N0})."); + } + else if (!node.HasActualStats && node.TableCardinality > 0 && node.EstimateRows < node.TableCardinality) + { + var selectivity = node.EstimateRows / node.TableCardinality * 100; + if (selectivity < 10) + parts.Add($"Only {selectivity:N1}% of rows estimated to survive filtering."); + } + + return new ScanImpact(costPct, elapsedPct, parts.Count > 0 ? string.Join(" ", parts) : null); + } + /// Determines whether a row estimate mismatch actually caused observable harm. /// Returns a description of the harm, or null if the bad estimate is benign. /// @@ -1460,6 +1670,57 @@ private static long SumSubtreeReads(PlanNode node) /// - A parent join may have chosen the wrong strategy based on bad row count /// - A parent Sort/Hash spilled (downstream estimate caused bad grant) /// + /// + /// Returns targeted advice based on statement-level wait stats, or null if no waits. + /// When the dominant wait type is clear, gives specific guidance instead of generic advice. + /// + private static string? GetWaitStatsAdvice(List waits) + { + if (waits.Count == 0) + return null; + + var totalMs = waits.Sum(w => w.WaitTimeMs); + if (totalMs == 0) + return null; + + var top = waits.OrderByDescending(w => w.WaitTimeMs).First(); + var topPct = (double)top.WaitTimeMs / totalMs * 100; + + // Only give targeted advice if the dominant wait is >= 80% of total wait time + if (topPct < 80) + return null; + + var waitType = top.WaitType.ToUpperInvariant(); + var advice = waitType switch + { + _ when waitType.StartsWith("PAGEIOLATCH") => + $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}. Data is being read from disk rather than memory. Consider adding indexes to reduce I/O, or investigate memory pressure.", + _ when waitType.StartsWith("LATCH_") => + $"Latch contention — {topPct:N0}% of wait time is {top.WaitType}.", + _ when waitType.StartsWith("LCK_") => + $"Lock contention — {topPct:N0}% of wait time is {top.WaitType}. Other sessions are holding locks that this query needs.", + _ when waitType.StartsWith("CXPACKET") || waitType.StartsWith("CXCONSUMER") => + $"Parallel thread skew — {topPct:N0}% of wait time is {top.WaitType}. Work is unevenly distributed across parallel threads.", + _ when waitType.Contains("IO_COMPLETION") => + $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}.", + _ when waitType.StartsWith("RESOURCE_SEMAPHORE") => + $"Memory grant wait — {topPct:N0}% of wait time is {top.WaitType}. The query had to wait for a memory grant.", + _ => $"Dominant wait is {top.WaitType} ({topPct:N0}% of wait time)." + }; + + return advice; + } + + private static bool AllocatesResources(PlanNode node) + { + // Operators that get memory grants or allocate structures based on row estimates. + // Hash Match (hash table), Sort (sort buffer), Spool (worktable). + var op = node.PhysicalOp; + return op.StartsWith("Hash", StringComparison.OrdinalIgnoreCase) + || op.StartsWith("Sort", StringComparison.OrdinalIgnoreCase) + || op.EndsWith("Spool", StringComparison.OrdinalIgnoreCase); + } + private static string? AssessEstimateHarm(PlanNode node, double ratio) { // Root node: no parent to harm. diff --git a/src/PlanViewer.Core/Services/ShowPlanParser.cs b/src/PlanViewer.Core/Services/ShowPlanParser.cs index 62ebf42..31c93d6 100644 --- a/src/PlanViewer.Core/Services/ShowPlanParser.cs +++ b/src/PlanViewer.Core/Services/ShowPlanParser.cs @@ -38,8 +38,9 @@ public static ParsedPlan Parse(string xml) foreach (var batchEl in batches) { var batch = new PlanBatch(); - var statementsEl = batchEl.Element(Ns + "Statements"); - if (statementsEl != null) + // A Batch can contain multiple elements (e.g., DECLARE + SELECT). + // Use Elements() to iterate all of them, not just the first. + foreach (var statementsEl in batchEl.Elements(Ns + "Statements")) { foreach (var stmtEl in statementsEl.Elements()) { @@ -205,7 +206,27 @@ private static List ParseStatementAndChildren(XElement stmtEl) } } - if (queryPlanEl == null) return stmt; + if (queryPlanEl == null) + { + // Statements with no QueryPlan (e.g., DECLARE/ASSIGN) still get a synthetic + // root node so they appear in the statement tab list. + var stmtType = stmt.StatementType.Length > 0 + ? stmt.StatementType.ToUpperInvariant() + : "STATEMENT"; + stmt.RootNode = new PlanNode + { + NodeId = -1, + PhysicalOp = stmtType, + LogicalOp = stmtType, + IconName = stmtType switch + { + "ASSIGN" => "assign", + "DECLARE" => "declare", + _ => "language_construct_catch_all" + } + }; + return stmt; + } ParseStmtAttributes(stmt, stmtEl); ParseQueryPlanElements(stmt, stmtEl, queryPlanEl);