diff --git a/.github/instructions/update_version.instructions.md b/.github/instructions/update_version.instructions.md index db322826..6df1876e 100644 --- a/.github/instructions/update_version.instructions.md +++ b/.github/instructions/update_version.instructions.md @@ -1,7 +1,8 @@ --- applyTo: '**' --- -After a code change, update the version +After a code change, update the version. +If updating in /docs, do not increment the version. Example Before Code Changes diff --git a/application/single_app/config.py b/application/single_app/config.py index 0f46400c..7196cfe8 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -94,7 +94,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.241.004" +VERSION = "0.241.006" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/foundry_agent_runtime.py b/application/single_app/foundry_agent_runtime.py index 4de7f35a..0a88fb46 100644 --- a/application/single_app/foundry_agent_runtime.py +++ b/application/single_app/foundry_agent_runtime.py @@ -64,6 +64,14 @@ class FoundryAgentInvocationError(RuntimeError): """Raised when the Foundry agent invocation cannot be completed.""" +def _normalize_max_completion_tokens(value: Any) -> Optional[int]: + try: + normalized = int(value) + except (TypeError, ValueError): + return None + return normalized if normalized > 0 else None + + class AzureAIFoundryChatCompletionAgent: """Lightweight wrapper so Foundry agents behave like SK chat agents.""" @@ -107,6 +115,7 @@ def invoke( global_settings=self._global_settings, message_history=history, metadata=metadata, + max_completion_tokens=self.max_completion_tokens, ) ) except RuntimeError: @@ -145,6 +154,7 @@ async def invoke_stream( global_settings=self._global_settings, message_history=list(messages), metadata={}, + max_completion_tokens=self.max_completion_tokens, ) self.last_run_citations = result.citations self.last_run_model = result.model @@ -194,6 +204,7 @@ def invoke( global_settings=self._global_settings, message_history=history, metadata=metadata, + max_completion_tokens=self.max_completion_tokens, ) ) self.last_run_citations = result.citations @@ -211,6 +222,7 @@ async def invoke_stream( global_settings=self._global_settings, message_history=list(messages), metadata={}, + max_completion_tokens=self.max_completion_tokens, ): if stream_message.metadata: citations = stream_message.metadata.get("citations") @@ -228,6 +240,7 @@ async def execute_foundry_agent( global_settings: Dict[str, Any], message_history: List[ChatMessageContent], metadata: Dict[str, Any], + max_completion_tokens: Optional[int] = None, ) -> FoundryAgentInvocationResult: """Invoke a Foundry agent using Semantic Kernel's AzureAIAgent abstraction.""" @@ -248,15 +261,20 @@ async def execute_foundry_agent( endpoint=endpoint, api_version=api_version, ) + resolved_max_completion_tokens = _normalize_max_completion_tokens(max_completion_tokens) try: definition = await client.agents.get_agent(agent_id) azure_agent = AzureAIAgent(client=client, definition=definition) responses = [] - async for response in azure_agent.invoke( - messages=message_history, - metadata={k: str(v) for k, v in metadata.items() if v is not None}, - ): + invoke_kwargs = { + "messages": message_history, + "metadata": {k: str(v) for k, v in metadata.items() if v is not None}, + } + if resolved_max_completion_tokens is not None: + invoke_kwargs["max_completion_tokens"] = resolved_max_completion_tokens + + async for response in azure_agent.invoke(**invoke_kwargs): responses.append(response) if not responses: @@ -299,6 +317,7 @@ async def execute_foundry_agent( "endpoint": endpoint, "model": model_value, "message_length": len(text or ""), + "max_completion_tokens": resolved_max_completion_tokens, }, ) @@ -321,6 +340,7 @@ async def execute_new_foundry_agent( global_settings: Dict[str, Any], message_history: List[ChatMessageContent], metadata: Dict[str, Any], + max_completion_tokens: Optional[int] = None, ) -> FoundryAgentInvocationResult: """Invoke the new Foundry application runtime through its Responses protocol endpoint.""" @@ -343,7 +363,12 @@ async def execute_new_foundry_agent( f"{endpoint.rstrip('/')}/applications/{quote(application_name, safe='')}/" "protocols/openai/responses" ) - payload = _build_new_foundry_request_payload(message_history, metadata, stream=False) + payload = _build_new_foundry_request_payload( + message_history, + metadata, + stream=False, + max_output_tokens=_normalize_max_completion_tokens(max_completion_tokens), + ) headers = { "Authorization": f"Bearer {token.token}", "Content-Type": "application/json", @@ -376,6 +401,7 @@ async def execute_new_foundry_agent( "endpoint": endpoint, "model": result.model, "message_length": len(result.message), + "max_output_tokens": payload.get("max_output_tokens"), }, ) @@ -390,6 +416,7 @@ async def execute_new_foundry_agent_stream( global_settings: Dict[str, Any], message_history: List[ChatMessageContent], metadata: Dict[str, Any], + max_completion_tokens: Optional[int] = None, ) -> AsyncIterator[FoundryAgentStreamMessage]: """Stream a new Foundry application response through the Responses API.""" @@ -413,7 +440,12 @@ async def execute_new_foundry_agent_stream( "protocols/openai/responses" ) debug_print(f"Invoking new Foundry application '{application_name}' at {endpoint} with streaming to url {url} with api-version {responses_api_version}") - payload = _build_new_foundry_request_payload(message_history, metadata, stream=True) + payload = _build_new_foundry_request_payload( + message_history, + metadata, + stream=True, + max_output_tokens=_normalize_max_completion_tokens(max_completion_tokens), + ) headers = { "Authorization": f"Bearer {token.token}", "Content-Type": "application/json", @@ -692,6 +724,7 @@ def _build_new_foundry_request_payload( message_history: List[ChatMessageContent], metadata: Dict[str, Any], stream: bool = False, + max_output_tokens: Optional[int] = None, ) -> Dict[str, Any]: input_items: List[Dict[str, Any]] = [] for message in message_history: @@ -733,6 +766,8 @@ def _build_new_foundry_request_payload( } if normalized_metadata: payload["metadata"] = normalized_metadata + if max_output_tokens is not None: + payload["max_output_tokens"] = max_output_tokens return payload diff --git a/application/single_app/functions_global_agents.py b/application/single_app/functions_global_agents.py index 7fecf1ee..51870b9c 100644 --- a/application/single_app/functions_global_agents.py +++ b/application/single_app/functions_global_agents.py @@ -51,7 +51,7 @@ def ensure_default_global_agent_exists(): ), "actions_to_load": [], "other_settings": {}, - "max_completion_tokens": 4096 + "max_completion_tokens": -1 } save_global_agent(default_agent) log_event( diff --git a/application/single_app/route_backend_chats.py b/application/single_app/route_backend_chats.py index c6e99a62..005fb67d 100644 --- a/application/single_app/route_backend_chats.py +++ b/application/single_app/route_backend_chats.py @@ -3890,6 +3890,15 @@ def is_tabular_access_limited_analysis(analysis_text): 'do not have direct access', "don't have", 'do not have', + "doesn't include the full", + 'does not include the full', + 'only sample rows', + 'only workbook metadata', + 'only sample rows and workbook metadata', + 'cannot accurately list all', + 'cannot accurately list them', + 'from the current evidence', + 'from the evidence provided', 'visible excerpt you provided', 'if those tool-backed results exist', 'allow me to query again', @@ -3898,6 +3907,80 @@ def is_tabular_access_limited_analysis(analysis_text): return any(phrase in normalized_analysis for phrase in inaccessible_phrases) +def get_tabular_result_coverage_summary(invocations): + """Return whether successful analytical tool calls produced full or partial result coverage.""" + coverage_summary = { + 'has_full_result_coverage': False, + 'has_partial_result_coverage': False, + } + + for invocation in invocations or []: + result_payload = get_tabular_invocation_result_payload(invocation) or {} + + total_matches = parse_tabular_result_count(result_payload.get('total_matches')) + returned_rows = parse_tabular_result_count(result_payload.get('returned_rows')) + if total_matches is not None and returned_rows is not None: + if returned_rows >= total_matches: + coverage_summary['has_full_result_coverage'] = True + elif returned_rows < total_matches: + coverage_summary['has_partial_result_coverage'] = True + + distinct_count = parse_tabular_result_count(result_payload.get('distinct_count')) + returned_values = parse_tabular_result_count(result_payload.get('returned_values')) + if distinct_count is not None and returned_values is not None: + if returned_values >= distinct_count: + coverage_summary['has_full_result_coverage'] = True + elif returned_values < distinct_count: + coverage_summary['has_partial_result_coverage'] = True + + if result_payload.get('full_rows_included') or result_payload.get('full_values_included'): + coverage_summary['has_full_result_coverage'] = True + if result_payload.get('sample_rows_limited') or result_payload.get('values_limited'): + coverage_summary['has_partial_result_coverage'] = True + + if ( + coverage_summary['has_full_result_coverage'] + and coverage_summary['has_partial_result_coverage'] + ): + break + + return coverage_summary + + +def build_tabular_success_execution_gap_messages(user_question, analysis_text, invocations): + """Return retry guidance when a successful tabular analysis still produced an incomplete answer.""" + coverage_summary = get_tabular_result_coverage_summary(invocations) + has_full_result_coverage = coverage_summary['has_full_result_coverage'] + has_partial_result_coverage = coverage_summary['has_partial_result_coverage'] + wants_exhaustive_results = question_requests_tabular_exhaustive_results(user_question) + execution_gap_messages = [] + + if is_tabular_access_limited_analysis(analysis_text): + if wants_exhaustive_results and has_full_result_coverage: + execution_gap_messages.append( + 'Previous attempt still claimed only sample rows or workbook metadata were available even though successful analytical tool calls returned the full matching result set. Answer directly from those returned rows and list the full results the user asked for.' + ) + elif has_full_result_coverage: + execution_gap_messages.append( + 'Previous attempt still claimed the requested data was unavailable even though successful analytical tool calls returned the full matching result set. Use the returned rows and answer directly.' + ) + else: + execution_gap_messages.append( + 'Previous attempt still claimed the requested data was unavailable even though analytical tool calls succeeded. Use the returned rows and answer directly.' + ) + + if ( + wants_exhaustive_results + and has_partial_result_coverage + and not has_full_result_coverage + ): + execution_gap_messages.append( + 'The user asked for a full list, but previous analytical calls returned only a partial slice. Rerun the relevant analytical call with a higher max_rows or max_values before answering.' + ) + + return execution_gap_messages + + def _select_likely_workbook_sheet(sheet_names, question_text, per_sheet=None, score_match_fn=None): """Return a likely sheet name when the user question strongly matches one sheet.""" score_match_fn = score_match_fn or _score_tabular_sheet_match @@ -4408,7 +4491,8 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None, "12. Summarize concrete found records sheet-by-sheet using the tool results, not schema placeholders.\n" "13. For count or percentage questions involving a cohort defined on one sheet and facts on another, prefer get_distinct_values, count_rows, filter_rows_by_related_values, or count_rows_by_related_values over manually counting sampled rows.\n" "14. Use normalize_match=true when matching names, owners, assignees, engineers, or similar entity-text columns across worksheets.\n" - "15. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report." + "15. If a successful tool result reports returned_rows == total_matches or returned_values == distinct_count, treat that as the full matching result set. Do not claim that only sample rows or workbook metadata are available in that case.\n" + "16. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report." ) return ( @@ -4461,8 +4545,9 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None, "22. For identifier-based workbook questions, locate the identifier on the correct sheet before explaining downstream calculations.\n" "23. For peak, busiest, highest, or lowest questions, use grouped functions and inspect the highest_group, highest_value, lowest_group, and lowest_value summary fields.\n" "24. Return only computed findings and name the strongest drivers clearly.\n" - "25. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report.\n" - "26. When using query_tabular_data, use simple DataFrame.query() syntax with backticked column names for columns containing spaces. Avoid method calls such as .str.lower(), .astype(...), or other Python expressions that DataFrame.query() may reject." + "25. If a successful tool result reports returned_rows == total_matches or returned_values == distinct_count, treat that as the full matching result set. Do not claim that only sample rows or workbook metadata are available in that case.\n" + "26. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report.\n" + "27. When using query_tabular_data, use simple DataFrame.query() syntax with backticked column names for columns containing spaces. Avoid method calls such as .str.lower(), .astype(...), or other Python expressions that DataFrame.query() may reject." ) baseline_invocations = plugin_logger.get_invocations_for_conversation( @@ -4631,10 +4716,19 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None, previous_tool_error_messages = [] previous_failed_call_parameters = [] previous_discovery_feedback_messages = [] + execution_gap_messages = [] + selected_sheets = [] + coverage_summary = get_tabular_result_coverage_summary( + successful_analytical_invocations + ) + retry_gap_messages = build_tabular_success_execution_gap_messages( + user_question, + analysis, + successful_analytical_invocations, + ) if entity_lookup_mode: selected_sheets = get_tabular_invocation_selected_sheets(successful_analytical_invocations) - execution_gap_messages = [] # Cross-sheet results ("ALL (cross-sheet search)") already span # the entire workbook โ€” no execution gap for sheet coverage. @@ -4648,24 +4742,24 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None, f"Previous attempt only queried worksheet(s): {rendered_selected_sheets}. The question asks for related records across worksheets, so query additional relevant sheets explicitly with sheet_name." ) - if is_tabular_access_limited_analysis(analysis): - execution_gap_messages.append( - 'Previous attempt still claimed the requested data was unavailable even though analytical tool calls succeeded. Use the returned rows and answer directly.' - ) + execution_gap_messages.extend(retry_gap_messages) - if execution_gap_messages and attempt_number < 3: - previous_execution_gap_messages = execution_gap_messages - log_event( - f"[Tabular SK Analysis] Attempt {attempt_number} entity lookup was incomplete despite successful tool calls; retrying", - extra={ - 'selected_sheets': selected_sheets, - 'execution_gaps': previous_execution_gap_messages, - 'successful_tool_count': len(successful_analytical_invocations), - }, - level=logging.WARNING, - ) - baseline_invocation_count = len(invocations_after) - continue + if execution_gap_messages and attempt_number < 3: + previous_execution_gap_messages = execution_gap_messages + log_event( + f"[Tabular SK Analysis] Attempt {attempt_number} analysis was incomplete despite successful tool calls; retrying", + extra={ + 'selected_sheets': selected_sheets, + 'execution_gaps': previous_execution_gap_messages, + 'successful_tool_count': len(successful_analytical_invocations), + 'has_full_result_coverage': coverage_summary.get('has_full_result_coverage', False), + 'has_partial_result_coverage': coverage_summary.get('has_partial_result_coverage', False), + 'entity_lookup_mode': entity_lookup_mode, + }, + level=logging.WARNING, + ) + baseline_invocation_count = len(invocations_after) + continue previous_execution_gap_messages = [] log_event( diff --git a/application/single_app/static/json/schemas/agent.schema.json b/application/single_app/static/json/schemas/agent.schema.json index 64f91251..11f17de0 100644 --- a/application/single_app/static/json/schemas/agent.schema.json +++ b/application/single_app/static/json/schemas/agent.schema.json @@ -110,7 +110,7 @@ "type": "integer", "minimum": -1, "maximum": 512000, - "default": 4096 + "default": -1 } }, "required": [ diff --git a/docs/_includes/latest_release_card.html b/docs/_includes/latest_release_card.html index d073f69e..0a2a35bb 100644 --- a/docs/_includes/latest_release_card.html +++ b/docs/_includes/latest_release_card.html @@ -39,9 +39,6 @@

{{ feature.cta_label | default: 'Read guide' }} - {% if primary_image %} - Open image - {% endif %}
\ No newline at end of file diff --git a/docs/_layouts/latest-release-feature.html b/docs/_layouts/latest-release-feature.html index c6615dff..38a7b88a 100644 --- a/docs/_layouts/latest-release-feature.html +++ b/docs/_layouts/latest-release-feature.html @@ -2,15 +2,9 @@ layout: default --- -{% assign feature_meta = site.data.latest_release_features.lookup[page.slug] %} +{% assign feature_key = page.name | split: '.' | first %} +{% assign feature_meta = site.data.latest_release_features.lookup[feature_key] %} {% assign accent = feature_meta.accent | default: 'blue' %} -{% assign primary_image = feature_meta.image %} -{% assign primary_image_alt = feature_meta.image_alt | default: page.title %} - -{% if primary_image == nil and feature_meta.images and feature_meta.images.size > 0 %} - {% assign primary_image = feature_meta.images[0].path %} - {% assign primary_image_alt = feature_meta.images[0].alt | default: page.title %} -{% endif %}
@@ -31,22 +25,13 @@

{{ page.title }}

Back to all highlights - {% if primary_image %} - Open screenshot - {% endif %}
- {% if primary_image %} - - {{ primary_image_alt }} - - {% else %} - - {% endif %} +
@@ -61,17 +46,22 @@

Screenshots

{{ feature_meta.images.size }} image{% if feature_meta.images.size != 1 %}s{% endif %} - -
\ No newline at end of file + + {% if feature_meta.images and feature_meta.images.size > 0 %} + + {% endif %} + + +{% if feature_meta.images and feature_meta.images.size > 0 %} + +{% endif %} diff --git a/docs/_layouts/latest-release-index.html b/docs/_layouts/latest-release-index.html index 8a8032e6..a9312aaa 100644 --- a/docs/_layouts/latest-release-index.html +++ b/docs/_layouts/latest-release-index.html @@ -2,6 +2,8 @@ layout: default --- +{% assign feature_data = site.data.latest_release_features %} +
@@ -24,5 +26,75 @@

{{ page.title }}

{{ content }} + +
+
+
+
Current release
+

{{ feature_data.current_release.label }}

+

{{ feature_data.current_release.description }}

+
+ {{ feature_data.current_release.badge }} +
+ +
+ {% for slug in feature_data.current_release.slugs %} + {% assign feature = feature_data.lookup[slug] %} + {% include latest_release_card.html feature=feature badge=feature_data.current_release.badge %} + {% endfor %} +
+
+ + {% for group in feature_data.previous_release_groups %} +
+ + + Archive + {{ group.label }} + {{ group.description }} + + + + v{{ group.release_version }} + Show highlights + + + +
+
+ {% for slug in group.slugs %} + {% assign feature = feature_data.lookup[slug] %} + {% include latest_release_card.html feature=feature badge=group.badge %} + {% endfor %} +
+ + {% if group.highlights %} +
+

Additional highlights from v{{ group.release_version }}

+
    + {% for item in group.highlights %} +
  • {{ item }}
  • + {% endfor %} +
+
+ {% endif %} + + {% if group.bug_fixes %} +
+

Bug fixes kept for reference

+
    + {% for item in group.bug_fixes %} +
  • {{ item }}
  • + {% endfor %} +
+
+ {% endif %} +
+
+ {% endfor %} + +
\ No newline at end of file diff --git a/docs/assets/css/main.scss b/docs/assets/css/main.scss index e77bd72c..740f311c 100644 --- a/docs/assets/css/main.scss +++ b/docs/assets/css/main.scss @@ -506,13 +506,11 @@ pre[class*="language-"] { width: min(36vw, 180px); } -.latest-release-hero-image, .latest-release-card-image { display: block; text-decoration: none; } -.latest-release-hero-image img, .latest-release-card-image img, .latest-release-rich-content img { border: 1px solid rgba(15, 23, 42, 0.1); @@ -522,11 +520,6 @@ pre[class*="language-"] { max-width: 100%; } -.latest-release-hero-image img { - max-height: 360px; - object-fit: cover; -} - .latest-release-index-content, .latest-release-feature-content { margin-top: 1.75rem; @@ -536,6 +529,69 @@ pre[class*="language-"] { margin-top: 1.75rem; } +.latest-release-thumbnail-trigger { + appearance: none; + background: transparent; + border: 0; + cursor: pointer; + display: block; + max-width: 100%; + padding: 0; + text-align: left; +} + +.latest-release-thumbnail-trigger:focus-visible { + outline: 2px solid #0d6efd; + outline-offset: 0.35rem; + border-radius: 1rem; +} + +.latest-release-thumbnail-card { + display: block; +} + +.latest-release-thumbnail-gallery { + display: flex; + flex-wrap: wrap; + gap: 1rem; + max-width: 100%; +} + +.latest-release-thumbnail-gallery .latest-release-thumbnail-trigger { + flex: 0 1 240px; +} + +.latest-release-thumbnail-media { + background: linear-gradient(135deg, #f8f9fa, #eef2f7); + border: 1px solid rgba(0, 0, 0, 0.06); + border-radius: 0.85rem; + box-shadow: 0 0.7rem 1.6rem rgba(15, 23, 42, 0.08); + display: block; + max-width: 100%; + object-fit: cover; + transition: transform 0.2s ease, box-shadow 0.2s ease; + width: 240px; +} + +.latest-release-thumbnail-trigger:hover .latest-release-thumbnail-media, +.latest-release-thumbnail-trigger:focus-visible .latest-release-thumbnail-media { + transform: translateY(-2px); + box-shadow: 0 0.9rem 1.8rem rgba(15, 23, 42, 0.14); +} + +.latest-release-thumbnail-meta { + color: #6c757d; + display: block; + font-size: 0.9rem; + margin-top: 0.55rem; +} + +.latest-release-thumbnail-title { + display: block; + font-weight: 600; + margin-top: 0.55rem; +} + .latest-release-section { margin-top: 1.5rem; } @@ -813,52 +869,54 @@ pre[class*="language-"] { text-underline-offset: 0.15rem; } -.latest-release-gallery-grid { - display: grid; - gap: 1rem; - grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); +.latest-feature-image-modal .modal-dialog { + max-width: min(1100px, calc(100vw - 2rem)); } -.latest-release-gallery-grid--single { - grid-template-columns: minmax(0, 1fr); -} - -.latest-release-gallery-card { - background: rgba(255, 255, 255, 0.98); - border: 1px solid var(--bs-border-color); +.latest-feature-image-modal .modal-content { + background: linear-gradient(180deg, rgba(25, 33, 52, 0.98), rgba(14, 20, 34, 0.98)); + border: 1px solid rgba(255, 255, 255, 0.12); border-radius: 1rem; - box-shadow: 0 0.8rem 1.8rem rgba(15, 23, 42, 0.05); + box-shadow: 0 1.4rem 3rem rgba(0, 0, 0, 0.38); + color: #f8f9fa; overflow: hidden; } -.latest-release-gallery-link { - display: block; +.latest-feature-image-modal .modal-header { + background: rgba(255, 255, 255, 0.03); + border-color: rgba(248, 249, 250, 0.12); } -.latest-release-gallery-link img { - aspect-ratio: 16 / 10; - border: 0; - border-bottom: 1px solid var(--bs-border-color); - border-radius: 0; - box-shadow: none; - object-fit: cover; - width: 100%; +.latest-feature-image-modal .modal-body { + background: radial-gradient(circle at top, rgba(96, 165, 250, 0.08), transparent 45%), rgba(12, 18, 30, 0.92); + padding: 1rem 1.2rem 1.2rem; } -.latest-release-gallery-caption { - display: flex; - flex-direction: column; - gap: 0.35rem; - padding: 1rem 1.05rem 1.05rem; +.latest-feature-image-modal-caption { + color: rgba(248, 250, 252, 0.9); + font-size: 0.98rem; } -.latest-release-gallery-caption strong { - font-size: 1rem; +.latest-feature-image-modal .btn-close { + filter: invert(1) grayscale(100%) brightness(200%); } -.latest-release-gallery-caption span { - color: var(--simplechat-secondary); - line-height: 1.6; +.latest-feature-image-frame { + background: linear-gradient(180deg, rgba(30, 41, 59, 0.9), rgba(15, 23, 42, 0.88)); + border: 1px solid rgba(148, 163, 184, 0.24); + border-radius: 1rem; + box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.05); + padding: 0.9rem; +} + +.latest-feature-image-modal img { + background: rgba(248, 250, 252, 0.98); + border-radius: 0.9rem; + box-shadow: 0 0.8rem 2rem rgba(0, 0, 0, 0.24); + display: block; + max-height: 75vh; + object-fit: contain; + width: 100%; } [data-bs-theme="dark"] .latest-release-hero { @@ -875,7 +933,6 @@ pre[class*="language-"] { [data-bs-theme="dark"] .latest-release-card-shell, [data-bs-theme="dark"] .latest-release-card-icon, [data-bs-theme="dark"] .latest-release-archive-panel, -[data-bs-theme="dark"] .latest-release-gallery-card, [data-bs-theme="dark"] .latest-release-rich-content > h2 + p, [data-bs-theme="dark"] .latest-release-rich-content > h2 + ul, [data-bs-theme="dark"] .latest-release-rich-content > h2 + ol { @@ -900,6 +957,7 @@ pre[class*="language-"] { [data-bs-theme="dark"] .latest-release-footer-note, [data-bs-theme="dark"] .latest-release-note-panel ul, [data-bs-theme="dark"] .latest-release-note-panel p, +[data-bs-theme="dark"] .latest-release-thumbnail-meta, [data-bs-theme="dark"] .latest-release-breadcrumb a { color: rgba(226, 232, 240, 0.82); } @@ -911,15 +969,30 @@ pre[class*="language-"] { border-color: rgba(var(--latest-release-accent-rgb), 0.28); } -[data-bs-theme="dark"] .latest-release-hero-image img, [data-bs-theme="dark"] .latest-release-card-image img, -[data-bs-theme="dark"] .latest-release-gallery-link img, [data-bs-theme="dark"] .latest-release-rich-content img { border-color: rgba(148, 163, 184, 0.2); } -[data-bs-theme="dark"] .latest-release-gallery-caption span { - color: rgba(226, 232, 240, 0.8); +[data-bs-theme="dark"] .latest-release-thumbnail-media { + background: linear-gradient(135deg, rgba(15, 23, 42, 0.92), rgba(30, 41, 59, 0.9)); + border-color: rgba(96, 165, 250, 0.2); +} + +[data-bs-theme="dark"] .latest-feature-image-modal .modal-content { + background: linear-gradient(180deg, rgba(26, 32, 44, 0.99), rgba(10, 14, 24, 0.99)); + border-color: rgba(148, 163, 184, 0.22); + box-shadow: 0 1.5rem 3.2rem rgba(0, 0, 0, 0.55); +} + +[data-bs-theme="dark"] .latest-feature-image-modal .modal-body { + background: radial-gradient(circle at top, rgba(59, 130, 246, 0.12), transparent 42%), rgba(8, 12, 20, 0.96); +} + +[data-bs-theme="dark"] .latest-feature-image-frame { + background: linear-gradient(180deg, rgba(17, 24, 39, 0.96), rgba(30, 41, 59, 0.92)); + border-color: rgba(96, 165, 250, 0.18); + box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.04), 0 0.75rem 1.8rem rgba(0, 0, 0, 0.28); } @media (max-width: 991.98px) { diff --git a/docs/assets/js/latest-release.js b/docs/assets/js/latest-release.js new file mode 100644 index 00000000..7923c4f2 --- /dev/null +++ b/docs/assets/js/latest-release.js @@ -0,0 +1,43 @@ +// latest-release.js + +function setupLatestFeatureImageModal() { + const modalElement = document.getElementById('latestFeatureImageModal'); + const modalImage = document.getElementById('latestFeatureImageModalImage'); + const modalTitle = document.getElementById('latestFeatureImageModalLabel'); + const modalCaption = document.getElementById('latestFeatureImageModalCaption'); + const imageTriggers = document.querySelectorAll('[data-latest-feature-image-src]'); + + if (!modalElement || !modalImage || !modalTitle || !modalCaption || imageTriggers.length === 0) { + return; + } + + const imageModal = bootstrap.Modal.getOrCreateInstance(modalElement); + + imageTriggers.forEach((trigger) => { + trigger.addEventListener('click', () => { + const imageSrc = trigger.dataset.latestFeatureImageSrc; + const imageTitle = trigger.dataset.latestFeatureImageTitle || 'Latest Feature Preview'; + const imageCaption = trigger.dataset.latestFeatureImageCaption || 'Click outside the popup to close it.'; + const imageAlt = trigger.querySelector('img')?.getAttribute('alt') || imageTitle; + + if (!imageSrc) { + return; + } + + modalImage.src = imageSrc; + modalImage.alt = imageAlt; + modalTitle.textContent = imageTitle; + modalCaption.textContent = imageCaption; + imageModal.show(); + }); + }); + + modalElement.addEventListener('hidden.bs.modal', () => { + modalImage.src = ''; + modalImage.alt = 'Latest feature preview'; + }); +} + +document.addEventListener('DOMContentLoaded', () => { + setupLatestFeatureImageModal(); +}); \ No newline at end of file diff --git a/docs/explanation/fixes/TABULAR_EXHAUSTIVE_RESULT_SYNTHESIS_FIX.md b/docs/explanation/fixes/TABULAR_EXHAUSTIVE_RESULT_SYNTHESIS_FIX.md new file mode 100644 index 00000000..347a8cf6 --- /dev/null +++ b/docs/explanation/fixes/TABULAR_EXHAUSTIVE_RESULT_SYNTHESIS_FIX.md @@ -0,0 +1,42 @@ +# Tabular Exhaustive Result Synthesis Fix + +Fixed/Implemented in version: **0.241.006** + +## Issue Description + +For exhaustive tabular questions such as "list out all of the security controls," the tabular analysis workflow could successfully execute an analytical tool call that returned the full matching result set, but the inner synthesis step could still answer as though it only had workbook schema samples. + +## Root Cause Analysis + +The main tabular retry guardrails in [route_backend_chats.py](application/single_app/route_backend_chats.py) only treated this kind of bad synthesis as retry-worthy in entity-lookup mode. General analytical requests could therefore accept a response that claimed only sample rows or workbook metadata were available even after a successful `query_tabular_data` call had returned the full result set. + +## Technical Details + +### Files Modified + +- `application/single_app/route_backend_chats.py` +- `functional_tests/test_tabular_exhaustive_result_synthesis_fix.py` +- `application/single_app/config.py` + +### Code Changes Summary + +- Expanded the access-limited synthesis detector to catch responses that say the data only includes sample rows, workbook metadata, or not the full list. +- Added result-coverage helpers that distinguish between full and partial analytical result slices. +- Reused those coverage signals in the primary tabular analysis loop so successful analytical calls can trigger a retry for general analysis mode, not just entity lookup. +- Added prompt guidance telling the tabular synthesis model to treat `returned_rows == total_matches` and `returned_values == distinct_count` as full result availability. + +### Testing Approach + +- Added a regression test covering full-result exhaustive list retries. +- Added a regression test covering partial-result exhaustive list reruns. + +## Validation + +### Expected Improvement + +- Exhaustive list questions no longer stop at a synthesis response that wrongly claims only schema samples are available after successful analytical tool calls. +- When only a partial slice is returned, the workflow now has explicit retry guidance to rerun the relevant analytical call with a higher limit before answering. + +### Related Version Update + +- `application/single_app/config.py` updated to `0.241.006`. \ No newline at end of file diff --git a/docs/explanation/release_notes.md b/docs/explanation/release_notes.md index 1092c4c8..a4ddd01f 100644 --- a/docs/explanation/release_notes.md +++ b/docs/explanation/release_notes.md @@ -4,6 +4,20 @@ This page tracks notable Simple Chat releases and organizes the detailed change For feature-focused and fix-focused drill-downs by version, see [Features by Version](/explanation/features/) and [Fixes by Version](/explanation/fixes/). +### **(v0.241.006)** + +#### Bug Fixes + +* **Agent Output Token Defaults and Foundry Limit Enforcement** + * Fixed stale agent output-token defaults so new and normalized agents now use `-1` to defer to the provider or model default instead of silently reintroducing older fixed caps. + * Azure AI Foundry agent execution now also honors saved output-token settings in both classic Foundry agent runs and new Foundry Responses-based runs, so configured limits are enforced consistently instead of only being stored in agent configuration. + * (Ref: `functions_global_agents.py`, `agent.schema.json`, `foundry_agent_runtime.py`, `test_foundry_token_limit_defaults.py`) + +* **Tabular Exhaustive Result Synthesis Retry** + * Fixed exhaustive tabular questions such as "list all" requests so the workflow no longer stops at an answer that claims only sample rows or workbook metadata are available after analytical tool calls already returned the full matching result set. + * General tabular analysis now detects full versus partial result coverage from tool metadata, retries incomplete synthesis when necessary, and adds stronger prompt guidance so the final answer uses the returned analytical results directly. + * (Ref: `route_backend_chats.py`, `test_tabular_exhaustive_result_synthesis_fix.py`, `TABULAR_EXHAUSTIVE_RESULT_SYNTHESIS_FIX.md`) + ### **(v0.241.002)** #### Bug Fixes diff --git a/docs/latest-release/index.md b/docs/latest-release/index.md index a6b1f253..0f55a08a 100644 --- a/docs/latest-release/index.md +++ b/docs/latest-release/index.md @@ -5,74 +5,4 @@ description: "Current feature guides with previous release highlights kept for r section: "Latest Release" --- -{% assign feature_data = site.data.latest_release_features %} - -
-
-
-
Current release
-

{{ feature_data.current_release.label }}

-

{{ feature_data.current_release.description }}

-
- {{ feature_data.current_release.badge }} -
- -
- {% for slug in feature_data.current_release.slugs %} - {% assign feature = feature_data.lookup[slug] %} - {% include latest_release_card.html feature=feature badge=feature_data.current_release.badge %} - {% endfor %} -
-
- -{% for group in feature_data.previous_release_groups %} -
- - - Archive - {{ group.label }} - {{ group.description }} - - - - v{{ group.release_version }} - Show highlights - - - -
-
- {% for slug in group.slugs %} - {% assign feature = feature_data.lookup[slug] %} - {% include latest_release_card.html feature=feature badge=group.badge %} - {% endfor %} -
- - {% if group.highlights %} -
-

Additional highlights from v{{ group.release_version }}

-
    - {% for item in group.highlights %} -
  • {{ item }}
  • - {% endfor %} -
-
- {% endif %} - - {% if group.bug_fixes %} -
-

Bug fixes kept for reference

-
    - {% for item in group.bug_fixes %} -
  • {{ item }}
  • - {% endfor %} -
-
- {% endif %} -
-
-{% endfor %} - - +This page mirrors the curated in-app Latest Features experience and keeps earlier release highlights available in an archive section below the current release. diff --git a/functional_tests/test_foundry_token_limit_defaults.py b/functional_tests/test_foundry_token_limit_defaults.py new file mode 100644 index 00000000..f96aff50 --- /dev/null +++ b/functional_tests/test_foundry_token_limit_defaults.py @@ -0,0 +1,286 @@ +# test_foundry_token_limit_defaults.py +#!/usr/bin/env python3 +""" +Functional test for Foundry token limit defaults and runtime forwarding. +Version: 0.241.005 +Implemented in: 0.241.005 + +This test ensures seeded agent defaults use model-native output limits and +that classic and new Foundry runtimes forward configured token caps. +""" + +import asyncio +import importlib +import sys +import types +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SINGLE_APP_ROOT = ROOT / "application" / "single_app" + +sys.path.insert(0, str(SINGLE_APP_ROOT)) +sys.path.insert(0, str(ROOT)) + + +def assert_contains(file_path: Path, expected: str) -> None: + content = file_path.read_text(encoding="utf-8") + if expected not in content: + raise AssertionError(f"Expected to find {expected!r} in {file_path}") + + +def restore_modules(original_modules): + for module_name, original_module in original_modules.items(): + if original_module is None: + sys.modules.pop(module_name, None) + else: + sys.modules[module_name] = original_module + + +def load_foundry_agent_runtime_module(): + functions_appinsights_stub = types.ModuleType("functions_appinsights") + functions_appinsights_stub.log_event = lambda *args, **kwargs: None + + functions_debug_stub = types.ModuleType("functions_debug") + functions_debug_stub.debug_print = lambda *args, **kwargs: None + + functions_keyvault_stub = types.ModuleType("functions_keyvault") + functions_keyvault_stub.retrieve_secret_from_key_vault_by_full_name = lambda value: value + functions_keyvault_stub.validate_secret_name_dynamic = lambda value: False + + requests_stub = types.ModuleType("requests") + requests_stub.last_post_args = None + requests_stub.last_post_kwargs = None + + class StubResponse: + def __init__(self, payload, status_code=200, headers=None, text=""): + self._payload = payload + self.status_code = status_code + self.headers = headers or {"Content-Type": "application/json"} + self.text = text + + def json(self): + return self._payload + + def close(self): + return None + + def post(*args, **kwargs): + requests_stub.last_post_args = args + requests_stub.last_post_kwargs = kwargs + return StubResponse( + { + "id": "resp-123", + "model": "gpt-5.4", + "output": [ + { + "type": "message", + "content": [ + { + "type": "output_text", + "text": "new foundry result", + } + ], + } + ], + } + ) + + requests_stub.Response = StubResponse + requests_stub.get = lambda *args, **kwargs: None + requests_stub.post = post + + azure_stub = types.ModuleType("azure") + azure_identity_stub = types.ModuleType("azure.identity") + azure_identity_aio_stub = types.ModuleType("azure.identity.aio") + + class Token: + def __init__(self, value): + self.token = value + + class SyncDefaultAzureCredential: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def get_token(self, scope): + return Token(f"sync:{scope}") + + def close(self): + return None + + class SyncClientSecretCredential(SyncDefaultAzureCredential): + pass + + class AsyncDefaultAzureCredential: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + async def get_token(self, scope): + return Token(f"async:{scope}") + + async def close(self): + return None + + class AsyncClientSecretCredential(AsyncDefaultAzureCredential): + pass + + class AzureAuthorityHosts: + AZURE_PUBLIC_CLOUD = "public" + AZURE_GOVERNMENT = "government" + + azure_identity_stub.AzureAuthorityHosts = AzureAuthorityHosts + azure_identity_stub.ClientSecretCredential = SyncClientSecretCredential + azure_identity_stub.DefaultAzureCredential = SyncDefaultAzureCredential + azure_identity_aio_stub.ClientSecretCredential = AsyncClientSecretCredential + azure_identity_aio_stub.DefaultAzureCredential = AsyncDefaultAzureCredential + + semantic_kernel_stub = types.ModuleType("semantic_kernel") + semantic_kernel_agents_stub = types.ModuleType("semantic_kernel.agents") + semantic_kernel_contents_stub = types.ModuleType("semantic_kernel.contents") + semantic_kernel_chat_stub = types.ModuleType("semantic_kernel.contents.chat_message_content") + + class StubAgentsOperations: + async def get_agent(self, agent_id): + return types.SimpleNamespace(model={"id": "gpt-5.4"}, agent_id=agent_id) + + async def delete_thread(self, thread_id): + return None + + class StubClient: + def __init__(self): + self.agents = StubAgentsOperations() + + async def close(self): + return None + + async def _delete_thread(): + return None + + class ChatMessageContent: + def __init__(self, content="", role="user", metadata=None): + self.content = content + self.role = role + self.metadata = metadata or {} + self.items = [] + + class AzureAIAgent: + last_invoke_kwargs = None + + def __init__(self, client=None, definition=None): + self.client = client + self.definition = definition + + @staticmethod + def create_client(*args, **kwargs): + return StubClient() + + async def invoke(self, **kwargs): + AzureAIAgent.last_invoke_kwargs = kwargs + message = ChatMessageContent(content="classic foundry result", metadata={}) + thread = types.SimpleNamespace(id="thread-123", delete=_delete_thread) + yield types.SimpleNamespace(thread=thread, message=message) + + semantic_kernel_agents_stub.AzureAIAgent = AzureAIAgent + semantic_kernel_chat_stub.ChatMessageContent = ChatMessageContent + + original_modules = {} + module_stubs = { + "functions_appinsights": functions_appinsights_stub, + "functions_debug": functions_debug_stub, + "functions_keyvault": functions_keyvault_stub, + "requests": requests_stub, + "azure": azure_stub, + "azure.identity": azure_identity_stub, + "azure.identity.aio": azure_identity_aio_stub, + "semantic_kernel": semantic_kernel_stub, + "semantic_kernel.agents": semantic_kernel_agents_stub, + "semantic_kernel.contents": semantic_kernel_contents_stub, + "semantic_kernel.contents.chat_message_content": semantic_kernel_chat_stub, + } + + for module_name, module_stub in module_stubs.items(): + original_modules[module_name] = sys.modules.get(module_name) + sys.modules[module_name] = module_stub + + original_modules["foundry_agent_runtime"] = sys.modules.get("foundry_agent_runtime") + sys.modules.pop("foundry_agent_runtime", None) + module = importlib.import_module("foundry_agent_runtime") + return module, original_modules, requests_stub, AzureAIAgent + + +def test_foundry_defaults_and_runtime_forwarding(): + """Seeded defaults should use -1 and Foundry runtimes should forward token caps.""" + print("๐Ÿ” Testing Foundry defaults and runtime token forwarding...") + + globals_path = ROOT / "application" / "single_app" / "functions_global_agents.py" + schema_path = ROOT / "application" / "single_app" / "static" / "json" / "schemas" / "agent.schema.json" + + assert_contains(globals_path, '"max_completion_tokens": -1') + assert_contains(schema_path, '"default": -1') + + module, original_modules, requests_stub, azure_ai_agent_cls = load_foundry_agent_runtime_module() + + try: + message_history = [module.ChatMessageContent(content="Hello Foundry")] + + classic_result = asyncio.run( + module.execute_foundry_agent( + foundry_settings={ + "agent_id": "agent-123", + "endpoint": "https://example.services.ai.azure.com", + }, + global_settings={}, + message_history=message_history, + metadata={"conversation_id": "conv-1"}, + max_completion_tokens=4096, + ) + ) + + assert azure_ai_agent_cls.last_invoke_kwargs is not None + assert azure_ai_agent_cls.last_invoke_kwargs.get("max_completion_tokens") == 4096 + assert classic_result.message == "classic foundry result" + + payload_without_limit = module._build_new_foundry_request_payload( + message_history, + {"conversation_id": "conv-1"}, + stream=False, + ) + assert "max_output_tokens" not in payload_without_limit + + new_result = asyncio.run( + module.execute_new_foundry_agent( + foundry_settings={ + "application_name": "test-app", + "endpoint": "https://example.services.ai.azure.com", + "responses_api_version": "2025-11-15-preview", + }, + global_settings={}, + message_history=message_history, + metadata={"conversation_id": "conv-2"}, + max_completion_tokens=8192, + ) + ) + + assert requests_stub.last_post_kwargs is not None + assert requests_stub.last_post_kwargs["json"].get("max_output_tokens") == 8192 + assert new_result.message == "new foundry result" + finally: + restore_modules(original_modules) + + print("โœ… Foundry defaults and runtime token forwarding verified.") + + +if __name__ == "__main__": + success = True + try: + test_foundry_defaults_and_runtime_forwarding() + except Exception as exc: + print(f"โŒ Test failed: {exc}") + import traceback + + traceback.print_exc() + success = False + + raise SystemExit(0 if success else 1) \ No newline at end of file diff --git a/functional_tests/test_tabular_entity_lookup_mode.py b/functional_tests/test_tabular_entity_lookup_mode.py index 990513fb..d453dabe 100644 --- a/functional_tests/test_tabular_entity_lookup_mode.py +++ b/functional_tests/test_tabular_entity_lookup_mode.py @@ -205,7 +205,7 @@ def test_entity_lookup_primary_sheet_hint_prefers_anchor_entity_sheet(): assert likely_sheet == 'Taxpayers', likely_sheet assert relevant_sheets[0] == 'Taxpayers', relevant_sheets assert relevant_sheets.index('Taxpayers') < relevant_sheets.index('Notices'), relevant_sheets - assert 'begin with filter_rows or query_tabular_data without sheet_name so the plugin can perform a cross-sheet discovery search' in route_content, route_content + assert 'begin with search_rows, filter_rows, or query_tabular_data without sheet_name so the plugin can perform a cross-sheet discovery search' in route_content, route_content assert 'Do not start with aggregate_column, group_by_aggregate, or group_by_datetime_component until you have located the relevant entity rows.' in route_content, route_content print('โœ… Entity-lookup primary worksheet hinting passed') diff --git a/functional_tests/test_tabular_exhaustive_result_synthesis_fix.py b/functional_tests/test_tabular_exhaustive_result_synthesis_fix.py new file mode 100644 index 00000000..564e6ec7 --- /dev/null +++ b/functional_tests/test_tabular_exhaustive_result_synthesis_fix.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +# test_tabular_exhaustive_result_synthesis_fix.py +""" +Functional test for tabular exhaustive-result synthesis retry. +Version: 0.241.006 +Implemented in: 0.241.006 + +This test ensures exhaustive tabular requests retry when successful analytical +tool calls already returned the full matching result set or only a partial +slice, but the synthesis response still behaves as though only schema samples +are available. +""" + +import ast +import json +import os +import sys +from types import SimpleNamespace + + +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(ROOT_DIR) +sys.path.append(os.path.join(ROOT_DIR, 'application', 'single_app')) + +ROUTE_FILE = os.path.join(ROOT_DIR, 'application', 'single_app', 'route_backend_chats.py') +TARGET_FUNCTIONS = { + 'question_requests_tabular_exhaustive_results', + 'parse_tabular_result_count', + 'get_tabular_invocation_result_payload', + 'is_tabular_access_limited_analysis', + 'get_tabular_result_coverage_summary', + 'build_tabular_success_execution_gap_messages', +} + + +def load_helpers(): + """Load the targeted tabular retry helpers from the route source.""" + with open(ROUTE_FILE, 'r', encoding='utf-8') as file_handle: + route_content = file_handle.read() + + parsed = ast.parse(route_content, filename=ROUTE_FILE) + selected_nodes = [] + for node in parsed.body: + if isinstance(node, ast.FunctionDef) and node.name in TARGET_FUNCTIONS: + selected_nodes.append(node) + + module = ast.Module(body=selected_nodes, type_ignores=[]) + namespace = { + 'json': json, + 're': __import__('re'), + } + exec(compile(module, ROUTE_FILE, 'exec'), namespace) + return namespace, route_content + + +def test_exhaustive_tabular_retry_detects_full_result_access_gap(): + """Verify full-result tool coverage forces a retry when synthesis claims sample-only access.""" + print('๐Ÿ” Testing exhaustive tabular retry for full-result access gaps...') + + try: + helpers, route_content = load_helpers() + wants_exhaustive_results = helpers['question_requests_tabular_exhaustive_results'] + is_access_limited_analysis = helpers['is_tabular_access_limited_analysis'] + get_tabular_result_coverage_summary = helpers['get_tabular_result_coverage_summary'] + build_execution_gap_messages = helpers['build_tabular_success_execution_gap_messages'] + + user_question = 'list out all of the security controls' + access_limited_analysis = ( + 'The workbook contains 1,189 controls and control enhancements in NIST SP 800-53 Rev. 5, ' + 'but the data provided here does not include the full 1,189-item list, only sample rows ' + 'and workbook metadata. So I cannot accurately list all of them from the current evidence.' + ) + invocations = [ + SimpleNamespace( + function_name='query_tabular_data', + parameters={ + 'filename': 'sp800-53r5-control-catalog.xlsx', + 'max_rows': '1189', + 'query_expression': '`Control Identifier` == `Control Identifier`', + }, + result=json.dumps({ + 'filename': 'sp800-53r5-control-catalog.xlsx', + 'selected_sheet': 'SP 800-53 Revision 5', + 'total_matches': 1189, + 'returned_rows': 1189, + 'data': [ + { + 'Control Identifier': 'AC-1', + 'Control (or Control Enhancement) Name': 'Policy and Procedures', + }, + { + 'Control Identifier': 'AC-2', + 'Control (or Control Enhancement) Name': 'Account Management', + }, + ], + }), + error_message=None, + ) + ] + + coverage_summary = get_tabular_result_coverage_summary(invocations) + execution_gap_messages = build_execution_gap_messages( + user_question, + access_limited_analysis, + invocations, + ) + + assert wants_exhaustive_results(user_question), user_question + assert is_access_limited_analysis(access_limited_analysis), access_limited_analysis + assert coverage_summary['has_full_result_coverage'] is True, coverage_summary + assert coverage_summary['has_partial_result_coverage'] is False, coverage_summary + assert any('full matching result set' in message for message in execution_gap_messages), execution_gap_messages + assert any('list the full results the user asked for' in message for message in execution_gap_messages), execution_gap_messages + assert 'Do not claim that only sample rows or workbook metadata are available in that case.' in route_content, route_content + + print('โœ… Exhaustive tabular retry for full-result access gaps passed') + return True + + except Exception as exc: + print(f'โŒ Test failed: {exc}') + import traceback + traceback.print_exc() + return False + + +def test_exhaustive_tabular_retry_detects_partial_result_slice(): + """Verify exhaustive requests trigger a rerun when analytical tools only returned a partial slice.""" + print('๐Ÿ” Testing exhaustive tabular retry for partial result slices...') + + try: + helpers, _ = load_helpers() + get_tabular_result_coverage_summary = helpers['get_tabular_result_coverage_summary'] + build_execution_gap_messages = helpers['build_tabular_success_execution_gap_messages'] + + user_question = 'show me all of the matching security controls' + invocations = [ + SimpleNamespace( + function_name='query_tabular_data', + parameters={ + 'filename': 'sp800-53r5-control-catalog.xlsx', + 'max_rows': '100', + 'query_expression': '`Control Identifier` == `Control Identifier`', + }, + result=json.dumps({ + 'filename': 'sp800-53r5-control-catalog.xlsx', + 'selected_sheet': 'SP 800-53 Revision 5', + 'total_matches': 1189, + 'returned_rows': 100, + 'data': [ + { + 'Control Identifier': 'AC-1', + 'Control (or Control Enhancement) Name': 'Policy and Procedures', + } + ], + }), + error_message=None, + ) + ] + + coverage_summary = get_tabular_result_coverage_summary(invocations) + execution_gap_messages = build_execution_gap_messages( + user_question, + 'Here is a representative sample of the matching controls.', + invocations, + ) + + assert coverage_summary['has_full_result_coverage'] is False, coverage_summary + assert coverage_summary['has_partial_result_coverage'] is True, coverage_summary + assert any('higher max_rows or max_values' in message for message in execution_gap_messages), execution_gap_messages + + print('โœ… Exhaustive tabular retry for partial result slices passed') + return True + + except Exception as exc: + print(f'โŒ Test failed: {exc}') + import traceback + traceback.print_exc() + return False + + +if __name__ == '__main__': + tests = [ + test_exhaustive_tabular_retry_detects_full_result_access_gap, + test_exhaustive_tabular_retry_detects_partial_result_slice, + ] + results = [] + + for test in tests: + print(f'\n๐Ÿงช Running {test.__name__}...') + results.append(test()) + + success = all(results) + print(f'\n๐Ÿ“Š Results: {sum(results)}/{len(results)} tests passed') + sys.exit(0 if success else 1) \ No newline at end of file