microsoft · paullizer · Apr 9, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.github/instructions/update_version.instructions.md b/.github/instructions/update_version.instructions.md
@@ -1,7 +1,8 @@
 ---
 applyTo: '**'
 ---
-After a code change, update the version 
+After a code change, update the version. 
+If updating in /docs, do not increment the version.
 
 Example
 Before Code Changes

diff --git a/application/single_app/config.py b/application/single_app/config.py
@@ -94,7 +94,7 @@
 EXECUTOR_TYPE = 'thread'
 EXECUTOR_MAX_WORKERS = 30
 SESSION_TYPE = 'filesystem'
-VERSION = "0.241.004"
+VERSION = "0.241.006"
 
 SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production')
 

diff --git a/application/single_app/foundry_agent_runtime.py b/application/single_app/foundry_agent_runtime.py
@@ -64,6 +64,14 @@ class FoundryAgentInvocationError(RuntimeError):
     """Raised when the Foundry agent invocation cannot be completed."""
 
 
+def _normalize_max_completion_tokens(value: Any) -> Optional[int]:
+    try:
+        normalized = int(value)
+    except (TypeError, ValueError):
+        return None
+    return normalized if normalized > 0 else None
+
+
 class AzureAIFoundryChatCompletionAgent:
     """Lightweight wrapper so Foundry agents behave like SK chat agents."""
 
@@ -107,6 +115,7 @@ def invoke(
                     global_settings=self._global_settings,
                     message_history=history,
                     metadata=metadata,
+                    max_completion_tokens=self.max_completion_tokens,
                 )
             )
         except RuntimeError:
@@ -145,6 +154,7 @@ async def invoke_stream(
             global_settings=self._global_settings,
             message_history=list(messages),
             metadata={},
+            max_completion_tokens=self.max_completion_tokens,
         )
         self.last_run_citations = result.citations
         self.last_run_model = result.model
@@ -194,6 +204,7 @@ def invoke(
                 global_settings=self._global_settings,
                 message_history=history,
                 metadata=metadata,
+                max_completion_tokens=self.max_completion_tokens,
             )
         )
         self.last_run_citations = result.citations
@@ -211,6 +222,7 @@ async def invoke_stream(
             global_settings=self._global_settings,
             message_history=list(messages),
             metadata={},
+            max_completion_tokens=self.max_completion_tokens,
         ):
             if stream_message.metadata:
                 citations = stream_message.metadata.get("citations")
@@ -228,6 +240,7 @@ async def execute_foundry_agent(
     global_settings: Dict[str, Any],
     message_history: List[ChatMessageContent],
     metadata: Dict[str, Any],
+    max_completion_tokens: Optional[int] = None,
 ) -> FoundryAgentInvocationResult:
     """Invoke a Foundry agent using Semantic Kernel's AzureAIAgent abstraction."""
 
@@ -248,15 +261,20 @@ async def execute_foundry_agent(
         endpoint=endpoint,
         api_version=api_version,
     )
+    resolved_max_completion_tokens = _normalize_max_completion_tokens(max_completion_tokens)
 
     try:
         definition = await client.agents.get_agent(agent_id)
         azure_agent = AzureAIAgent(client=client, definition=definition)
         responses = []
-        async for response in azure_agent.invoke(
-            messages=message_history,
-            metadata={k: str(v) for k, v in metadata.items() if v is not None},
-        ):
+        invoke_kwargs = {
+            "messages": message_history,
+            "metadata": {k: str(v) for k, v in metadata.items() if v is not None},
+        }
+        if resolved_max_completion_tokens is not None:
+            invoke_kwargs["max_completion_tokens"] = resolved_max_completion_tokens
+
+        async for response in azure_agent.invoke(**invoke_kwargs):
             responses.append(response)
 
         if not responses:
@@ -299,6 +317,7 @@ async def execute_foundry_agent(
                 "endpoint": endpoint,
                 "model": model_value,
                 "message_length": len(text or ""),
+                "max_completion_tokens": resolved_max_completion_tokens,
             },
         )
 
@@ -321,6 +340,7 @@ async def execute_new_foundry_agent(
     global_settings: Dict[str, Any],
     message_history: List[ChatMessageContent],
     metadata: Dict[str, Any],
+    max_completion_tokens: Optional[int] = None,
 ) -> FoundryAgentInvocationResult:
     """Invoke the new Foundry application runtime through its Responses protocol endpoint."""
 
@@ -343,7 +363,12 @@ async def execute_new_foundry_agent(
         f"{endpoint.rstrip('/')}/applications/{quote(application_name, safe='')}/"
         "protocols/openai/responses"
     )
-    payload = _build_new_foundry_request_payload(message_history, metadata, stream=False)
+    payload = _build_new_foundry_request_payload(
+        message_history,
+        metadata,
+        stream=False,
+        max_output_tokens=_normalize_max_completion_tokens(max_completion_tokens),
+    )
     headers = {
         "Authorization": f"Bearer {token.token}",
         "Content-Type": "application/json",
@@ -376,6 +401,7 @@ async def execute_new_foundry_agent(
                 "endpoint": endpoint,
                 "model": result.model,
                 "message_length": len(result.message),
+                "max_output_tokens": payload.get("max_output_tokens"),
             },
         )
 
@@ -390,6 +416,7 @@ async def execute_new_foundry_agent_stream(
     global_settings: Dict[str, Any],
     message_history: List[ChatMessageContent],
     metadata: Dict[str, Any],
+    max_completion_tokens: Optional[int] = None,
 ) -> AsyncIterator[FoundryAgentStreamMessage]:
     """Stream a new Foundry application response through the Responses API."""
 
@@ -413,7 +440,12 @@ async def execute_new_foundry_agent_stream(
         "protocols/openai/responses"
     )
     debug_print(f"Invoking new Foundry application '{application_name}' at {endpoint} with streaming to url {url} with api-version {responses_api_version}")
-    payload = _build_new_foundry_request_payload(message_history, metadata, stream=True)
+    payload = _build_new_foundry_request_payload(
+        message_history,
+        metadata,
+        stream=True,
+        max_output_tokens=_normalize_max_completion_tokens(max_completion_tokens),
+    )
     headers = {
         "Authorization": f"Bearer {token.token}",
         "Content-Type": "application/json",
@@ -692,6 +724,7 @@ def _build_new_foundry_request_payload(
     message_history: List[ChatMessageContent],
     metadata: Dict[str, Any],
     stream: bool = False,
+    max_output_tokens: Optional[int] = None,
 ) -> Dict[str, Any]:
     input_items: List[Dict[str, Any]] = []
     for message in message_history:
@@ -733,6 +766,8 @@ def _build_new_foundry_request_payload(
     }
     if normalized_metadata:
         payload["metadata"] = normalized_metadata
+    if max_output_tokens is not None:
+        payload["max_output_tokens"] = max_output_tokens
     return payload
 
 

diff --git a/application/single_app/functions_global_agents.py b/application/single_app/functions_global_agents.py
@@ -51,7 +51,7 @@ def ensure_default_global_agent_exists():
                 ),
                 "actions_to_load": [],
                 "other_settings": {},
-                "max_completion_tokens": 4096
+                "max_completion_tokens": -1
             }
             save_global_agent(default_agent)
             log_event(

diff --git a/application/single_app/route_backend_chats.py b/application/single_app/route_backend_chats.py
@@ -3890,6 +3890,15 @@ def is_tabular_access_limited_analysis(analysis_text):
         'do not have direct access',
         "don't have",
         'do not have',
+        "doesn't include the full",
+        'does not include the full',
+        'only sample rows',
+        'only workbook metadata',
+        'only sample rows and workbook metadata',
+        'cannot accurately list all',
+        'cannot accurately list them',
+        'from the current evidence',
+        'from the evidence provided',
         'visible excerpt you provided',
         'if those tool-backed results exist',
         'allow me to query again',
@@ -3898,6 +3907,80 @@ def is_tabular_access_limited_analysis(analysis_text):
     return any(phrase in normalized_analysis for phrase in inaccessible_phrases)
 
 
+def get_tabular_result_coverage_summary(invocations):
+    """Return whether successful analytical tool calls produced full or partial result coverage."""
+    coverage_summary = {
+        'has_full_result_coverage': False,
+        'has_partial_result_coverage': False,
+    }
+
+    for invocation in invocations or []:
+        result_payload = get_tabular_invocation_result_payload(invocation) or {}
+
+        total_matches = parse_tabular_result_count(result_payload.get('total_matches'))
+        returned_rows = parse_tabular_result_count(result_payload.get('returned_rows'))
+        if total_matches is not None and returned_rows is not None:
+            if returned_rows >= total_matches:
+                coverage_summary['has_full_result_coverage'] = True
+            elif returned_rows < total_matches:
+                coverage_summary['has_partial_result_coverage'] = True
+
+        distinct_count = parse_tabular_result_count(result_payload.get('distinct_count'))
+        returned_values = parse_tabular_result_count(result_payload.get('returned_values'))
+        if distinct_count is not None and returned_values is not None:
+            if returned_values >= distinct_count:
+                coverage_summary['has_full_result_coverage'] = True
+            elif returned_values < distinct_count:
+                coverage_summary['has_partial_result_coverage'] = True
+
+        if result_payload.get('full_rows_included') or result_payload.get('full_values_included'):
+            coverage_summary['has_full_result_coverage'] = True
+        if result_payload.get('sample_rows_limited') or result_payload.get('values_limited'):
+            coverage_summary['has_partial_result_coverage'] = True
+
+        if (
+            coverage_summary['has_full_result_coverage']
+            and coverage_summary['has_partial_result_coverage']
+        ):
+            break
+
+    return coverage_summary
+
+
+def build_tabular_success_execution_gap_messages(user_question, analysis_text, invocations):
+    """Return retry guidance when a successful tabular analysis still produced an incomplete answer."""
+    coverage_summary = get_tabular_result_coverage_summary(invocations)
+    has_full_result_coverage = coverage_summary['has_full_result_coverage']
+    has_partial_result_coverage = coverage_summary['has_partial_result_coverage']
+    wants_exhaustive_results = question_requests_tabular_exhaustive_results(user_question)
+    execution_gap_messages = []
+
+    if is_tabular_access_limited_analysis(analysis_text):
+        if wants_exhaustive_results and has_full_result_coverage:
+            execution_gap_messages.append(
+                'Previous attempt still claimed only sample rows or workbook metadata were available even though successful analytical tool calls returned the full matching result set. Answer directly from those returned rows and list the full results the user asked for.'
+            )
+        elif has_full_result_coverage:
+            execution_gap_messages.append(
+                'Previous attempt still claimed the requested data was unavailable even though successful analytical tool calls returned the full matching result set. Use the returned rows and answer directly.'
+            )
+        else:
+            execution_gap_messages.append(
+                'Previous attempt still claimed the requested data was unavailable even though analytical tool calls succeeded. Use the returned rows and answer directly.'
+            )
+
+    if (
+        wants_exhaustive_results
+        and has_partial_result_coverage
+        and not has_full_result_coverage
+    ):
+        execution_gap_messages.append(
+            'The user asked for a full list, but previous analytical calls returned only a partial slice. Rerun the relevant analytical call with a higher max_rows or max_values before answering.'
+        )
+
+    return execution_gap_messages
+
+
 def _select_likely_workbook_sheet(sheet_names, question_text, per_sheet=None, score_match_fn=None):
     """Return a likely sheet name when the user question strongly matches one sheet."""
     score_match_fn = score_match_fn or _score_tabular_sheet_match
@@ -4408,7 +4491,8 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None,
                     "12. Summarize concrete found records sheet-by-sheet using the tool results, not schema placeholders.\n"
                     "13. For count or percentage questions involving a cohort defined on one sheet and facts on another, prefer get_distinct_values, count_rows, filter_rows_by_related_values, or count_rows_by_related_values over manually counting sampled rows.\n"
                     "14. Use normalize_match=true when matching names, owners, assignees, engineers, or similar entity-text columns across worksheets.\n"
-                    "15. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report."
+                    "15. If a successful tool result reports returned_rows == total_matches or returned_values == distinct_count, treat that as the full matching result set. Do not claim that only sample rows or workbook metadata are available in that case.\n"
+                    "16. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report."
                 )
 
             return (
@@ -4461,8 +4545,9 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None,
                 "22. For identifier-based workbook questions, locate the identifier on the correct sheet before explaining downstream calculations.\n"
                 "23. For peak, busiest, highest, or lowest questions, use grouped functions and inspect the highest_group, highest_value, lowest_group, and lowest_value summary fields.\n"
                 "24. Return only computed findings and name the strongest drivers clearly.\n"
-                "25. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report.\n"
-                "26. When using query_tabular_data, use simple DataFrame.query() syntax with backticked column names for columns containing spaces. Avoid method calls such as .str.lower(), .astype(...), or other Python expressions that DataFrame.query() may reject."
+                "25. If a successful tool result reports returned_rows == total_matches or returned_values == distinct_count, treat that as the full matching result set. Do not claim that only sample rows or workbook metadata are available in that case.\n"
+                "26. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report.\n"
+                "27. When using query_tabular_data, use simple DataFrame.query() syntax with backticked column names for columns containing spaces. Avoid method calls such as .str.lower(), .astype(...), or other Python expressions that DataFrame.query() may reject."
             )
 
         baseline_invocations = plugin_logger.get_invocations_for_conversation(
@@ -4631,10 +4716,19 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None,
                         previous_tool_error_messages = []
                         previous_failed_call_parameters = []
                         previous_discovery_feedback_messages = []
+                        execution_gap_messages = []
+                        selected_sheets = []
+                        coverage_summary = get_tabular_result_coverage_summary(
+                            successful_analytical_invocations
+                        )
+                        retry_gap_messages = build_tabular_success_execution_gap_messages(
+                            user_question,
+                            analysis,
+                            successful_analytical_invocations,
+                        )
 
                         if entity_lookup_mode:
                             selected_sheets = get_tabular_invocation_selected_sheets(successful_analytical_invocations)
-                            execution_gap_messages = []
 
                             # Cross-sheet results ("ALL (cross-sheet search)") already span
                             # the entire workbook — no execution gap for sheet coverage.
@@ -4648,24 +4742,24 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None,
                                     f"Previous attempt only queried worksheet(s): {rendered_selected_sheets}. The question asks for related records across worksheets, so query additional relevant sheets explicitly with sheet_name."
                                 )
 
-                            if is_tabular_access_limited_analysis(analysis):
-                                execution_gap_messages.append(
-                                    'Previous attempt still claimed the requested data was unavailable even though analytical tool calls succeeded. Use the returned rows and answer directly.'
-                                )
+                        execution_gap_messages.extend(retry_gap_messages)
 
-                            if execution_gap_messages and attempt_number < 3:
-                                previous_execution_gap_messages = execution_gap_messages
-                                log_event(
-                                    f"[Tabular SK Analysis] Attempt {attempt_number} entity lookup was incomplete despite successful tool calls; retrying",
-                                    extra={
-                                        'selected_sheets': selected_sheets,
-                                        'execution_gaps': previous_execution_gap_messages,
-                                        'successful_tool_count': len(successful_analytical_invocations),
-                                    },
-                                    level=logging.WARNING,
-                                )
-                                baseline_invocation_count = len(invocations_after)
-                                continue
+                        if execution_gap_messages and attempt_number < 3:
+                            previous_execution_gap_messages = execution_gap_messages
+                            log_event(
+                                f"[Tabular SK Analysis] Attempt {attempt_number} analysis was incomplete despite successful tool calls; retrying",
+                                extra={
+                                    'selected_sheets': selected_sheets,
+                                    'execution_gaps': previous_execution_gap_messages,
+                                    'successful_tool_count': len(successful_analytical_invocations),
+                                    'has_full_result_coverage': coverage_summary.get('has_full_result_coverage', False),
+                                    'has_partial_result_coverage': coverage_summary.get('has_partial_result_coverage', False),
+                                    'entity_lookup_mode': entity_lookup_mode,
+                                },
+                                level=logging.WARNING,
+                            )
+                            baseline_invocation_count = len(invocations_after)
+                            continue
 
                         previous_execution_gap_messages = []
                         log_event(

diff --git a/application/single_app/static/json/schemas/agent.schema.json b/application/single_app/static/json/schemas/agent.schema.json
@@ -110,7 +110,7 @@
           "type": "integer",
           "minimum": -1,
           "maximum": 512000,
-          "default": 4096
+          "default": -1
         }
       },
       "required": [

diff --git a/docs/_includes/latest_release_card.html b/docs/_includes/latest_release_card.html
@@ -39,9 +39,6 @@ <h3 class="latest-release-card-title">
 
     <div class="latest-release-card-actions">
       <a class="btn btn-primary btn-sm" href="{{ feature.url | relative_url }}">{{ feature.cta_label | default: 'Read guide' }}</a>
-      {% if primary_image %}
-        <a class="btn btn-outline-secondary btn-sm" href="{{ primary_image | relative_url }}" target="_blank" rel="noopener">Open image</a>
-      {% endif %}
     </div>
   </div>
 </article>